]> CyberLeo.Net >> Repos - FreeBSD/releng/8.1.git/blob - contrib/bind9/lib/dns/rbtdb.c
Copy stable/8 to releng/8.1 in preparation for 8.1-RC1.
[FreeBSD/releng/8.1.git] / contrib / bind9 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004-2010  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: rbtdb.c,v 1.270.12.16.8.3 2010/02/26 00:24:39 marka Exp $ */
19
20 /*! \file */
21
22 /*
23  * Principal Author: Bob Halley
24  */
25
26 #include <config.h>
27
28 /* #define inline */
29
30 #include <isc/event.h>
31 #include <isc/heap.h>
32 #include <isc/mem.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
41 #include <isc/task.h>
42 #include <isc/time.h>
43 #include <isc/util.h>
44
45 #include <dns/acache.h>
46 #include <dns/db.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
50 #include <dns/lib.h>
51 #include <dns/log.h>
52 #include <dns/masterdump.h>
53 #include <dns/nsec.h>
54 #include <dns/nsec3.h>
55 #include <dns/rbt.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
63 #include <dns/view.h>
64 #include <dns/zone.h>
65 #include <dns/zonekey.h>
66
67 #ifdef DNS_RBTDB_VERSION64
68 #include "rbtdb64.h"
69 #else
70 #include "rbtdb.h"
71 #endif
72
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
75 #else
76 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
77 #endif
78
79 /*%
80  * Note that "impmagic" is not the first four bytes of the struct, so
81  * ISC_MAGIC_VALID cannot be used.
82  */
83 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
84                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
85
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t                    rbtdb_serial_t;
88 /*%
89  * Make casting easier in symbolic debuggers by using different names
90  * for the 64 bit version.
91  */
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
95 #else
96 typedef isc_uint32_t                    rbtdb_serial_t;
97 #endif
98
99 typedef isc_uint32_t                    rbtdb_rdatatype_t;
100
101 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
104
105 #define RBTDB_RDATATYPE_SIGNSEC \
106                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_NCACHEANY \
116                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
117
118 /*
119  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
120  * Using rwlock is effective with regard to lookup performance only when
121  * it is implemented in an efficient way.
122  * Otherwise, it is generally wise to stick to the simple locking since rwlock
123  * would require more memory or can even make lookups slower due to its own
124  * overhead (when it internally calls mutex locks).
125  */
126 #ifdef ISC_RWLOCK_USEATOMIC
127 #define DNS_RBTDB_USERWLOCK 1
128 #else
129 #define DNS_RBTDB_USERWLOCK 0
130 #endif
131
132 #if DNS_RBTDB_USERWLOCK
133 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
134 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
135 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
136 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
137 #else
138 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
139 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
140 #define RBTDB_LOCK(l, t)        LOCK(l)
141 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
142 #endif
143
144 /*
145  * Since node locking is sensitive to both performance and memory footprint,
146  * we need some trick here.  If we have both high-performance rwlock and
147  * high performance and small-memory reference counters, we use rwlock for
148  * node lock and isc_refcount for node references.  In this case, we don't have
149  * to protect the access to the counters by locks.
150  * Otherwise, we simply use ordinary mutex lock for node locking, and use
151  * simple integers as reference counters which is protected by the lock.
152  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
153  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
154  * counters first and then protect other parts of a node as read-only data.
155  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
156  * provided for these special cases.  When we can use the efficient backend
157  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
158  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
159  * section including the access to the reference counter.
160  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
161  * section is also protected by NODE_STRONGLOCK().
162  */
163 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
164 typedef isc_rwlock_t nodelock_t;
165
166 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
167 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
168 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
169 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
170 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
171
172 #define NODE_STRONGLOCK(l)      ((void)0)
173 #define NODE_STRONGUNLOCK(l)    ((void)0)
174 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
175 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
176 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
177 #else
178 typedef isc_mutex_t nodelock_t;
179
180 #define NODE_INITLOCK(l)        isc_mutex_init(l)
181 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
182 #define NODE_LOCK(l, t)         LOCK(l)
183 #define NODE_UNLOCK(l, t)       UNLOCK(l)
184 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
185
186 #define NODE_STRONGLOCK(l)      LOCK(l)
187 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
188 #define NODE_WEAKLOCK(l, t)     ((void)0)
189 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
190 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
191 #endif
192
193 /*%
194  * Whether to rate-limit updating the LRU to avoid possible thread contention.
195  * Our performance measurement has shown the cost is marginal, so it's defined
196  * to be 0 by default either with or without threads.
197  */
198 #ifndef DNS_RBTDB_LIMITLRUUPDATE
199 #define DNS_RBTDB_LIMITLRUUPDATE 0
200 #endif
201
202 /*
203  * Allow clients with a virtual time of up to 5 minutes in the past to see
204  * records that would have otherwise have expired.
205  */
206 #define RBTDB_VIRTUAL 300
207
208 struct noqname {
209         dns_name_t      name;
210         void *          neg;
211         void *          negsig;
212         dns_rdatatype_t type;
213 };
214
215 typedef struct acachectl acachectl_t;
216
217 typedef struct rdatasetheader {
218         /*%
219          * Locked by the owning node's lock.
220          */
221         rbtdb_serial_t                  serial;
222         dns_ttl_t                       rdh_ttl;
223         rbtdb_rdatatype_t               type;
224         isc_uint16_t                    attributes;
225         dns_trust_t                     trust;
226         struct noqname                  *noqname;
227         struct noqname                  *closest;
228         /*%<
229          * We don't use the LIST macros, because the LIST structure has
230          * both head and tail pointers, and is doubly linked.
231          */
232
233         struct rdatasetheader           *next;
234         /*%<
235          * If this is the top header for an rdataset, 'next' points
236          * to the top header for the next rdataset (i.e., the next type).
237          * Otherwise, it points up to the header whose down pointer points
238          * at this header.
239          */
240
241         struct rdatasetheader           *down;
242         /*%<
243          * Points to the header for the next older version of
244          * this rdataset.
245          */
246
247         isc_uint32_t                    count;
248         /*%<
249          * Monotonously increased every time this rdataset is bound so that
250          * it is used as the base of the starting point in DNS responses
251          * when the "cyclic" rrset-order is required.  Since the ordering
252          * should not be so crucial, no lock is set for the counter for
253          * performance reasons.
254          */
255
256         acachectl_t                     *additional_auth;
257         acachectl_t                     *additional_glue;
258
259         dns_rbtnode_t                   *node;
260         isc_stdtime_t                   last_used;
261         ISC_LINK(struct rdatasetheader) link;
262
263         unsigned int                    heap_index;
264         /*%<
265          * Used for TTL-based cache cleaning.
266          */
267         isc_stdtime_t                   resign;
268 } rdatasetheader_t;
269
270 typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
271 typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;
272
273 #define RDATASET_ATTR_NONEXISTENT       0x0001
274 #define RDATASET_ATTR_STALE             0x0002
275 #define RDATASET_ATTR_IGNORE            0x0004
276 #define RDATASET_ATTR_RETAIN            0x0008
277 #define RDATASET_ATTR_NXDOMAIN          0x0010
278 #define RDATASET_ATTR_RESIGN            0x0020
279 #define RDATASET_ATTR_STATCOUNT         0x0040
280 #define RDATASET_ATTR_OPTOUT            0x0080
281
282 typedef struct acache_cbarg {
283         dns_rdatasetadditional_t        type;
284         unsigned int                    count;
285         dns_db_t                        *db;
286         dns_dbnode_t                    *node;
287         rdatasetheader_t                *header;
288 } acache_cbarg_t;
289
290 struct acachectl {
291         dns_acacheentry_t               *entry;
292         acache_cbarg_t                  *cbarg;
293 };
294
295 /*
296  * XXX
297  * When the cache will pre-expire data (due to memory low or other
298  * situations) before the rdataset's TTL has expired, it MUST
299  * respect the RETAIN bit and not expire the data until its TTL is
300  * expired.
301  */
302
303 #undef IGNORE                   /* WIN32 winbase.h defines this. */
304
305 #define EXISTS(header) \
306         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
307 #define NONEXISTENT(header) \
308         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
309 #define IGNORE(header) \
310         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
311 #define RETAIN(header) \
312         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
313 #define NXDOMAIN(header) \
314         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
315 #define RESIGN(header) \
316         (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
317 #define OPTOUT(header) \
318         (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
319
320 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
321
322 /*%
323  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
324  * There is a tradeoff issue about configuring this value: if this is too
325  * small, it may cause heavier contention between threads; if this is too large,
326  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
327  * The default value should work well for most environments, but this can
328  * also be configurable at compilation time via the
329  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
330  * 1 due to the assumption of overmem_purge().
331  */
332 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
333 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
334 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
335 #else
336 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
337 #endif
338 #else
339 #define DEFAULT_CACHE_NODE_LOCK_COUNT   16
340 #endif  /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
341
342 typedef struct {
343         nodelock_t                      lock;
344         /* Protected in the refcount routines. */
345         isc_refcount_t                  references;
346         /* Locked by lock. */
347         isc_boolean_t                   exiting;
348 } rbtdb_nodelock_t;
349
350 typedef struct rbtdb_changed {
351         dns_rbtnode_t *                 node;
352         isc_boolean_t                   dirty;
353         ISC_LINK(struct rbtdb_changed)  link;
354 } rbtdb_changed_t;
355
356 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
357
358 typedef enum {
359         dns_db_insecure,
360         dns_db_partial,
361         dns_db_secure
362 } dns_db_secure_t;
363
364 typedef struct rbtdb_version {
365         /* Not locked */
366         rbtdb_serial_t                  serial;
367         /*
368          * Protected in the refcount routines.
369          * XXXJT: should we change the lock policy based on the refcount
370          * performance?
371          */
372         isc_refcount_t                  references;
373         /* Locked by database lock. */
374         isc_boolean_t                   writer;
375         isc_boolean_t                   commit_ok;
376         rbtdb_changedlist_t             changed_list;
377         rdatasetheaderlist_t            resigned_list;
378         ISC_LINK(struct rbtdb_version)  link;
379         dns_db_secure_t                 secure;
380         isc_boolean_t                   havensec3;
381         /* NSEC3 parameters */
382         dns_hash_t                      hash;
383         isc_uint8_t                     flags;
384         isc_uint16_t                    iterations;
385         isc_uint8_t                     salt_length;
386         unsigned char                   salt[DNS_NSEC3_SALTSIZE];
387 } rbtdb_version_t;
388
389 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
390
391 typedef struct {
392         /* Unlocked. */
393         dns_db_t                        common;
394 #if DNS_RBTDB_USERWLOCK
395         isc_rwlock_t                    lock;
396 #else
397         isc_mutex_t                     lock;
398 #endif
399         isc_rwlock_t                    tree_lock;
400         unsigned int                    node_lock_count;
401         rbtdb_nodelock_t *              node_locks;
402         dns_rbtnode_t *                 origin_node;
403         dns_stats_t *                   rrsetstats; /* cache DB only */
404         /* Locked by lock. */
405         unsigned int                    active;
406         isc_refcount_t                  references;
407         unsigned int                    attributes;
408         rbtdb_serial_t                  current_serial;
409         rbtdb_serial_t                  least_serial;
410         rbtdb_serial_t                  next_serial;
411         rbtdb_version_t *               current_version;
412         rbtdb_version_t *               future_version;
413         rbtdb_versionlist_t             open_versions;
414         isc_boolean_t                   overmem;
415         isc_task_t *                    task;
416         dns_dbnode_t                    *soanode;
417         dns_dbnode_t                    *nsnode;
418
419         /*
420          * This is a linked list used to implement the LRU cache.  There will
421          * be node_lock_count linked lists here.  Nodes in bucket 1 will be
422          * placed on the linked list rdatasets[1].
423          */
424         rdatasetheaderlist_t            *rdatasets;
425
426         /*%
427          * Temporary storage for stale cache nodes and dynamically deleted
428          * nodes that await being cleaned up.
429          */
430         rbtnodelist_t                   *deadnodes;
431
432         /*
433          * Heaps.  Each of these is used for TTL based expiry.
434          */
435         isc_heap_t                      **heaps;
436
437         /* Locked by tree_lock. */
438         dns_rbt_t *                     tree;
439         dns_rbt_t *                     nsec3;
440
441         /* Unlocked */
442         unsigned int                    quantum;
443 } dns_rbtdb_t;
444
445 #define RBTDB_ATTR_LOADED               0x01
446 #define RBTDB_ATTR_LOADING              0x02
447
448 /*%
449  * Search Context
450  */
451 typedef struct {
452         dns_rbtdb_t *           rbtdb;
453         rbtdb_version_t *       rbtversion;
454         rbtdb_serial_t          serial;
455         unsigned int            options;
456         dns_rbtnodechain_t      chain;
457         isc_boolean_t           copy_name;
458         isc_boolean_t           need_cleanup;
459         isc_boolean_t           wild;
460         dns_rbtnode_t *         zonecut;
461         rdatasetheader_t *      zonecut_rdataset;
462         rdatasetheader_t *      zonecut_sigrdataset;
463         dns_fixedname_t         zonecut_name;
464         isc_stdtime_t           now;
465 } rbtdb_search_t;
466
467 /*%
468  * Load Context
469  */
470 typedef struct {
471         dns_rbtdb_t *           rbtdb;
472         isc_stdtime_t           now;
473 } rbtdb_load_t;
474
475 static void rdataset_disassociate(dns_rdataset_t *rdataset);
476 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
477 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
478 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
479 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
480 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
481 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
482                                         dns_name_t *name,
483                                         dns_rdataset_t *neg,
484                                         dns_rdataset_t *negsig);
485 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
486                                         dns_name_t *name,
487                                         dns_rdataset_t *neg,
488                                         dns_rdataset_t *negsig);
489 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
490                                            dns_rdatasetadditional_t type,
491                                            dns_rdatatype_t qtype,
492                                            dns_acache_t *acache,
493                                            dns_zone_t **zonep,
494                                            dns_db_t **dbp,
495                                            dns_dbversion_t **versionp,
496                                            dns_dbnode_t **nodep,
497                                            dns_name_t *fname,
498                                            dns_message_t *msg,
499                                            isc_stdtime_t now);
500 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
501                                            dns_rdatasetadditional_t type,
502                                            dns_rdatatype_t qtype,
503                                            dns_acache_t *acache,
504                                            dns_zone_t *zone,
505                                            dns_db_t *db,
506                                            dns_dbversion_t *version,
507                                            dns_dbnode_t *node,
508                                            dns_name_t *fname);
509 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
510                                            dns_rdataset_t *rdataset,
511                                            dns_rdatasetadditional_t type,
512                                            dns_rdatatype_t qtype);
513 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
514                                               isc_stdtime_t now);
515 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
516                           isc_stdtime_t now);
517 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
518                           isc_boolean_t tree_locked);
519 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
520                           isc_stdtime_t now, isc_boolean_t tree_locked);
521 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
522                                   rdatasetheader_t *newheader);
523 static void prune_tree(isc_task_t *task, isc_event_t *event);
524 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
525 static void rdataset_expire(dns_rdataset_t *rdataset);
526
527 static dns_rdatasetmethods_t rdataset_methods = {
528         rdataset_disassociate,
529         rdataset_first,
530         rdataset_next,
531         rdataset_current,
532         rdataset_clone,
533         rdataset_count,
534         NULL,
535         rdataset_getnoqname,
536         NULL,
537         rdataset_getclosest,
538         rdataset_getadditional,
539         rdataset_setadditional,
540         rdataset_putadditional,
541         rdataset_settrust,
542         rdataset_expire
543 };
544
545 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
546 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
547 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
548 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
549                                  dns_rdataset_t *rdataset);
550
551 static dns_rdatasetitermethods_t rdatasetiter_methods = {
552         rdatasetiter_destroy,
553         rdatasetiter_first,
554         rdatasetiter_next,
555         rdatasetiter_current
556 };
557
558 typedef struct rbtdb_rdatasetiter {
559         dns_rdatasetiter_t              common;
560         rdatasetheader_t *              current;
561 } rbtdb_rdatasetiter_t;
562
563 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
564 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
565 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
566 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
567                                         dns_name_t *name);
568 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
569 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
570 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
571                                            dns_dbnode_t **nodep,
572                                            dns_name_t *name);
573 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
574 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
575                                           dns_name_t *name);
576
577 static dns_dbiteratormethods_t dbiterator_methods = {
578         dbiterator_destroy,
579         dbiterator_first,
580         dbiterator_last,
581         dbiterator_seek,
582         dbiterator_prev,
583         dbiterator_next,
584         dbiterator_current,
585         dbiterator_pause,
586         dbiterator_origin
587 };
588
589 #define DELETION_BATCH_MAX 64
590
591 /*
592  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
593  */
594 typedef struct rbtdb_dbiterator {
595         dns_dbiterator_t                common;
596         isc_boolean_t                   paused;
597         isc_boolean_t                   new_origin;
598         isc_rwlocktype_t                tree_locked;
599         isc_result_t                    result;
600         dns_fixedname_t                 name;
601         dns_fixedname_t                 origin;
602         dns_rbtnodechain_t              chain;
603         dns_rbtnodechain_t              nsec3chain;
604         dns_rbtnodechain_t              *current;
605         dns_rbtnode_t                   *node;
606         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
607         int                             delete;
608         isc_boolean_t                   nsec3only;
609         isc_boolean_t                   nonsec3;
610 } rbtdb_dbiterator_t;
611
612
613 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
614 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
615
616 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
617                        isc_event_t *event);
618 static void overmem(dns_db_t *db, isc_boolean_t overmem);
619 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
620                                isc_boolean_t *nsec3createflag);
621
622 /*%
623  * 'init_count' is used to initialize 'newheader->count' which inturn
624  * is used to determine where in the cycle rrset-order cyclic starts.
625  * We don't lock this as we don't care about simultaneous updates.
626  *
627  * Note:
628  *      Both init_count and header->count can be ISC_UINT32_MAX.
629  *      The count on the returned rdataset however can't be as
630  *      that indicates that the database does not implement cyclic
631  *      processing.
632  */
633 static unsigned int init_count;
634
635 /*
636  * Locking
637  *
638  * If a routine is going to lock more than one lock in this module, then
639  * the locking must be done in the following order:
640  *
641  *      Tree Lock
642  *
643  *      Node Lock       (Only one from the set may be locked at one time by
644  *                       any caller)
645  *
646  *      Database Lock
647  *
648  * Failure to follow this hierarchy can result in deadlock.
649  */
650
651 /*
652  * Deleting Nodes
653  *
654  * For zone databases the node for the origin of the zone MUST NOT be deleted.
655  */
656
657
658 /*
659  * DB Routines
660  */
661
662 static void
663 attach(dns_db_t *source, dns_db_t **targetp) {
664         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
665
666         REQUIRE(VALID_RBTDB(rbtdb));
667
668         isc_refcount_increment(&rbtdb->references, NULL);
669
670         *targetp = source;
671 }
672
673 static void
674 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
675         dns_rbtdb_t *rbtdb = event->ev_arg;
676
677         UNUSED(task);
678
679         free_rbtdb(rbtdb, ISC_TRUE, event);
680 }
681
682 static void
683 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
684                   isc_boolean_t increment)
685 {
686         dns_rdatastatstype_t statattributes = 0;
687         dns_rdatastatstype_t base = 0;
688         dns_rdatastatstype_t type;
689
690         /* At the moment we count statistics only for cache DB */
691         INSIST(IS_CACHE(rbtdb));
692
693         if (NXDOMAIN(header))
694                 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
695         else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
696                 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
697                 base = RBTDB_RDATATYPE_EXT(header->type);
698         } else
699                 base = RBTDB_RDATATYPE_BASE(header->type);
700
701         type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
702         if (increment)
703                 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
704         else
705                 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
706 }
707
708 static void
709 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
710         int idx;
711         isc_heap_t *heap;
712         dns_ttl_t oldttl;
713
714         oldttl = header->rdh_ttl;
715         header->rdh_ttl = newttl;
716
717         if (!IS_CACHE(rbtdb))
718                 return;
719
720         /*
721          * It's possible the rbtdb is not a cache.  If this is the case,
722          * we will not have a heap, and we move on.  If we do, though,
723          * we might need to adjust things.
724          */
725         if (header->heap_index == 0 || newttl == oldttl)
726                 return;
727         idx = header->node->locknum;
728         if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
729             return;
730         heap = rbtdb->heaps[idx];
731
732         if (newttl < oldttl)
733                 isc_heap_increased(heap, header->heap_index);
734         else
735                 isc_heap_decreased(heap, header->heap_index);
736 }
737
738 /*%
739  * These functions allow the heap code to rank the priority of each
740  * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
741  */
742 static isc_boolean_t
743 ttl_sooner(void *v1, void *v2) {
744         rdatasetheader_t *h1 = v1;
745         rdatasetheader_t *h2 = v2;
746
747         if (h1->rdh_ttl < h2->rdh_ttl)
748                 return (ISC_TRUE);
749         return (ISC_FALSE);
750 }
751
752 static isc_boolean_t
753 resign_sooner(void *v1, void *v2) {
754         rdatasetheader_t *h1 = v1;
755         rdatasetheader_t *h2 = v2;
756
757         if (h1->resign < h2->resign)
758                 return (ISC_TRUE);
759         return (ISC_FALSE);
760 }
761
762 /*%
763  * This function sets the heap index into the header.
764  */
765 static void
766 set_index(void *what, unsigned int index) {
767         rdatasetheader_t *h = what;
768
769         h->heap_index = index;
770 }
771
772 /*%
773  * Work out how many nodes can be deleted in the time between two
774  * requests to the nameserver.  Smooth the resulting number and use it
775  * as a estimate for the number of nodes to be deleted in the next
776  * iteration.
777  */
778 static unsigned int
779 adjust_quantum(unsigned int old, isc_time_t *start) {
780         unsigned int pps = dns_pps;     /* packets per second */
781         unsigned int interval;
782         isc_uint64_t usecs;
783         isc_time_t end;
784         unsigned int new;
785
786         if (pps < 100)
787                 pps = 100;
788         isc_time_now(&end);
789
790         interval = 1000000 / pps;       /* interval in usec */
791         if (interval == 0)
792                 interval = 1;
793         usecs = isc_time_microdiff(&end, start);
794         if (usecs == 0) {
795                 /*
796                  * We were unable to measure the amount of time taken.
797                  * Double the nodes deleted next time.
798                  */
799                 old *= 2;
800                 if (old > 1000)
801                         old = 1000;
802                 return (old);
803         }
804         new = old * interval;
805         new /= (unsigned int)usecs;
806         if (new == 0)
807                 new = 1;
808         else if (new > 1000)
809                 new = 1000;
810
811         /* Smooth */
812         new = (new + old * 3) / 4;
813
814         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
815                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
816
817         return (new);
818 }
819
820 static void
821 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
822         unsigned int i;
823         isc_ondestroy_t ondest;
824         isc_result_t result;
825         char buf[DNS_NAME_FORMATSIZE];
826         isc_time_t start;
827
828         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
829                 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
830
831         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
832         REQUIRE(rbtdb->future_version == NULL);
833
834         if (rbtdb->current_version != NULL) {
835                 unsigned int refs;
836
837                 isc_refcount_decrement(&rbtdb->current_version->references,
838                                        &refs);
839                 INSIST(refs == 0);
840                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
841                 isc_refcount_destroy(&rbtdb->current_version->references);
842                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
843                             sizeof(rbtdb_version_t));
844         }
845
846         /*
847          * We assume the number of remaining dead nodes is reasonably small;
848          * the overhead of unlinking all nodes here should be negligible.
849          */
850         for (i = 0; i < rbtdb->node_lock_count; i++) {
851                 dns_rbtnode_t *node;
852
853                 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
854                 while (node != NULL) {
855                         ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
856                         node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
857                 }
858         }
859
860         if (event == NULL)
861                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
862  again:
863         if (rbtdb->tree != NULL) {
864                 isc_time_now(&start);
865                 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
866                 if (result == ISC_R_QUOTA) {
867                         INSIST(rbtdb->task != NULL);
868                         if (rbtdb->quantum != 0)
869                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
870                                                                 &start);
871                         if (event == NULL)
872                                 event = isc_event_allocate(rbtdb->common.mctx,
873                                                            NULL,
874                                                          DNS_EVENT_FREESTORAGE,
875                                                            free_rbtdb_callback,
876                                                            rbtdb,
877                                                            sizeof(isc_event_t));
878                         if (event == NULL)
879                                 goto again;
880                         isc_task_send(rbtdb->task, &event);
881                         return;
882                 }
883                 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
884         }
885
886         if (rbtdb->nsec3 != NULL) {
887                 isc_time_now(&start);
888                 result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
889                 if (result == ISC_R_QUOTA) {
890                         INSIST(rbtdb->task != NULL);
891                         if (rbtdb->quantum != 0)
892                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
893                                                                 &start);
894                         if (event == NULL)
895                                 event = isc_event_allocate(rbtdb->common.mctx,
896                                                            NULL,
897                                                          DNS_EVENT_FREESTORAGE,
898                                                            free_rbtdb_callback,
899                                                            rbtdb,
900                                                            sizeof(isc_event_t));
901                         if (event == NULL)
902                                 goto again;
903                         isc_task_send(rbtdb->task, &event);
904                         return;
905                 }
906                 INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
907         }
908
909         if (event != NULL)
910                 isc_event_free(&event);
911         if (log) {
912                 if (dns_name_dynamic(&rbtdb->common.origin))
913                         dns_name_format(&rbtdb->common.origin, buf,
914                                         sizeof(buf));
915                 else
916                         strcpy(buf, "<UNKNOWN>");
917                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
918                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
919                               "done free_rbtdb(%s)", buf);
920         }
921         if (dns_name_dynamic(&rbtdb->common.origin))
922                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
923         for (i = 0; i < rbtdb->node_lock_count; i++) {
924                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
925                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
926         }
927
928         /*
929          * Clean up LRU / re-signing order lists.
930          */
931         if (rbtdb->rdatasets != NULL) {
932                 for (i = 0; i < rbtdb->node_lock_count; i++)
933                         INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
934                 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
935                             rbtdb->node_lock_count *
936                             sizeof(rdatasetheaderlist_t));
937         }
938         /*
939          * Clean up dead node buckets.
940          */
941         if (rbtdb->deadnodes != NULL) {
942                 for (i = 0; i < rbtdb->node_lock_count; i++)
943                         INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
944                 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
945                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
946         }
947         /*
948          * Clean up heap objects.
949          */
950         if (rbtdb->heaps != NULL) {
951                 for (i = 0; i < rbtdb->node_lock_count; i++)
952                         isc_heap_destroy(&rbtdb->heaps[i]);
953                 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
954                             rbtdb->node_lock_count *
955                             sizeof(isc_heap_t *));
956         }
957
958         if (rbtdb->rrsetstats != NULL)
959                 dns_stats_detach(&rbtdb->rrsetstats);
960
961         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
962                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
963         isc_rwlock_destroy(&rbtdb->tree_lock);
964         isc_refcount_destroy(&rbtdb->references);
965         if (rbtdb->task != NULL)
966                 isc_task_detach(&rbtdb->task);
967
968         RBTDB_DESTROYLOCK(&rbtdb->lock);
969         rbtdb->common.magic = 0;
970         rbtdb->common.impmagic = 0;
971         ondest = rbtdb->common.ondest;
972         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
973         isc_ondestroy_notify(&ondest, rbtdb);
974 }
975
976 static inline void
977 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
978         isc_boolean_t want_free = ISC_FALSE;
979         unsigned int i;
980         unsigned int inactive = 0;
981
982         /* XXX check for open versions here */
983
984         if (rbtdb->soanode != NULL)
985                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
986         if (rbtdb->nsnode != NULL)
987                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
988
989         /*
990          * Even though there are no external direct references, there still
991          * may be nodes in use.
992          */
993         for (i = 0; i < rbtdb->node_lock_count; i++) {
994                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
995                 rbtdb->node_locks[i].exiting = ISC_TRUE;
996                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
997                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
998                     == 0) {
999                         inactive++;
1000                 }
1001         }
1002
1003         if (inactive != 0) {
1004                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1005                 rbtdb->active -= inactive;
1006                 if (rbtdb->active == 0)
1007                         want_free = ISC_TRUE;
1008                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1009                 if (want_free) {
1010                         char buf[DNS_NAME_FORMATSIZE];
1011                         if (dns_name_dynamic(&rbtdb->common.origin))
1012                                 dns_name_format(&rbtdb->common.origin, buf,
1013                                                 sizeof(buf));
1014                         else
1015                                 strcpy(buf, "<UNKNOWN>");
1016                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1017                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1018                                       "calling free_rbtdb(%s)", buf);
1019                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
1020                 }
1021         }
1022 }
1023
1024 static void
1025 detach(dns_db_t **dbp) {
1026         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1027         unsigned int refs;
1028
1029         REQUIRE(VALID_RBTDB(rbtdb));
1030
1031         isc_refcount_decrement(&rbtdb->references, &refs);
1032
1033         if (refs == 0)
1034                 maybe_free_rbtdb(rbtdb);
1035
1036         *dbp = NULL;
1037 }
1038
1039 static void
1040 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1041         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1042         rbtdb_version_t *version;
1043         unsigned int refs;
1044
1045         REQUIRE(VALID_RBTDB(rbtdb));
1046
1047         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1048         version = rbtdb->current_version;
1049         isc_refcount_increment(&version->references, &refs);
1050         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1051
1052         *versionp = (dns_dbversion_t *)version;
1053 }
1054
1055 static inline rbtdb_version_t *
1056 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1057                  unsigned int references, isc_boolean_t writer)
1058 {
1059         isc_result_t result;
1060         rbtdb_version_t *version;
1061
1062         version = isc_mem_get(mctx, sizeof(*version));
1063         if (version == NULL)
1064                 return (NULL);
1065         version->serial = serial;
1066         result = isc_refcount_init(&version->references, references);
1067         if (result != ISC_R_SUCCESS) {
1068                 isc_mem_put(mctx, version, sizeof(*version));
1069                 return (NULL);
1070         }
1071         version->writer = writer;
1072         version->commit_ok = ISC_FALSE;
1073         ISC_LIST_INIT(version->changed_list);
1074         ISC_LIST_INIT(version->resigned_list);
1075         ISC_LINK_INIT(version, link);
1076
1077         return (version);
1078 }
1079
1080 static isc_result_t
1081 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1082         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1083         rbtdb_version_t *version;
1084
1085         REQUIRE(VALID_RBTDB(rbtdb));
1086         REQUIRE(versionp != NULL && *versionp == NULL);
1087         REQUIRE(rbtdb->future_version == NULL);
1088
1089         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1090         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
1091         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1092                                    ISC_TRUE);
1093         if (version != NULL) {
1094                 version->commit_ok = ISC_TRUE;
1095                 version->secure = rbtdb->current_version->secure;
1096                 version->havensec3 = rbtdb->current_version->havensec3;
1097                 if (version->havensec3) {
1098                         version->flags = rbtdb->current_version->flags;
1099                         version->iterations =
1100                                 rbtdb->current_version->iterations;
1101                         version->hash = rbtdb->current_version->hash;
1102                         version->salt_length =
1103                                 rbtdb->current_version->salt_length;
1104                         memcpy(version->salt, rbtdb->current_version->salt,
1105                                version->salt_length);
1106                 } else {
1107                         version->flags = 0;
1108                         version->iterations = 0;
1109                         version->hash = 0;
1110                         version->salt_length = 0;
1111                         memset(version->salt, 0, sizeof(version->salt));
1112                 }
1113                 rbtdb->next_serial++;
1114                 rbtdb->future_version = version;
1115         }
1116         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1117
1118         if (version == NULL)
1119                 return (ISC_R_NOMEMORY);
1120
1121         *versionp = version;
1122
1123         return (ISC_R_SUCCESS);
1124 }
1125
1126 static void
1127 attachversion(dns_db_t *db, dns_dbversion_t *source,
1128               dns_dbversion_t **targetp)
1129 {
1130         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1131         rbtdb_version_t *rbtversion = source;
1132         unsigned int refs;
1133
1134         REQUIRE(VALID_RBTDB(rbtdb));
1135
1136         isc_refcount_increment(&rbtversion->references, &refs);
1137         INSIST(refs > 1);
1138
1139         *targetp = rbtversion;
1140 }
1141
1142 static rbtdb_changed_t *
1143 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1144             dns_rbtnode_t *node)
1145 {
1146         rbtdb_changed_t *changed;
1147         unsigned int refs;
1148
1149         /*
1150          * Caller must be holding the node lock if its reference must be
1151          * protected by the lock.
1152          */
1153
1154         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1155
1156         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1157
1158         REQUIRE(version->writer);
1159
1160         if (changed != NULL) {
1161                 dns_rbtnode_refincrement(node, &refs);
1162                 INSIST(refs != 0);
1163                 changed->node = node;
1164                 changed->dirty = ISC_FALSE;
1165                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1166         } else
1167                 version->commit_ok = ISC_FALSE;
1168
1169         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1170
1171         return (changed);
1172 }
1173
1174 static void
1175 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1176                  acachectl_t *array)
1177 {
1178         unsigned int count;
1179         unsigned int i;
1180         unsigned char *raw;     /* RDATASLAB */
1181
1182         /*
1183          * The caller must be holding the corresponding node lock.
1184          */
1185
1186         if (array == NULL)
1187                 return;
1188
1189         raw = (unsigned char *)header + sizeof(*header);
1190         count = raw[0] * 256 + raw[1];
1191
1192         /*
1193          * Sanity check: since an additional cache entry has a reference to
1194          * the original DB node (in the callback arg), there should be no
1195          * acache entries when the node can be freed.
1196          */
1197         for (i = 0; i < count; i++)
1198                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1199
1200         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1201 }
1202
1203 static inline void
1204 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1205
1206         if (dns_name_dynamic(&(*noqname)->name))
1207                 dns_name_free(&(*noqname)->name, mctx);
1208         if ((*noqname)->neg != NULL)
1209                 isc_mem_put(mctx, (*noqname)->neg,
1210                             dns_rdataslab_size((*noqname)->neg, 0));
1211         if ((*noqname)->negsig != NULL)
1212                 isc_mem_put(mctx, (*noqname)->negsig,
1213                             dns_rdataslab_size((*noqname)->negsig, 0));
1214         isc_mem_put(mctx, *noqname, sizeof(**noqname));
1215         *noqname = NULL;
1216 }
1217
1218 static inline void
1219 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1220 {
1221         ISC_LINK_INIT(h, link);
1222         h->heap_index = 0;
1223
1224 #if TRACE_HEADER
1225         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1226                 fprintf(stderr, "initialized header: %p\n", h);
1227 #else
1228         UNUSED(rbtdb);
1229 #endif
1230 }
1231
1232 static inline rdatasetheader_t *
1233 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1234 {
1235         rdatasetheader_t *h;
1236
1237         h = isc_mem_get(mctx, sizeof(*h));
1238         if (h == NULL)
1239                 return (NULL);
1240
1241 #if TRACE_HEADER
1242         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1243                 fprintf(stderr, "allocated header: %p\n", h);
1244 #endif
1245         init_rdataset(rbtdb, h);
1246         return (h);
1247 }
1248
1249 static inline void
1250 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1251 {
1252         unsigned int size;
1253         int idx;
1254
1255         if (EXISTS(rdataset) &&
1256             (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1257                 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1258         }
1259
1260         idx = rdataset->node->locknum;
1261         if (ISC_LINK_LINKED(rdataset, link)) {
1262                 INSIST(IS_CACHE(rbtdb));
1263                 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1264         }
1265         if (rdataset->heap_index != 0)
1266                 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1267         rdataset->heap_index = 0;
1268
1269         if (rdataset->noqname != NULL)
1270                 free_noqname(mctx, &rdataset->noqname);
1271         if (rdataset->closest != NULL)
1272                 free_noqname(mctx, &rdataset->closest);
1273
1274         free_acachearray(mctx, rdataset, rdataset->additional_auth);
1275         free_acachearray(mctx, rdataset, rdataset->additional_glue);
1276
1277         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1278                 size = sizeof(*rdataset);
1279         else
1280                 size = dns_rdataslab_size((unsigned char *)rdataset,
1281                                           sizeof(*rdataset));
1282         isc_mem_put(mctx, rdataset, size);
1283 }
1284
1285 static inline void
1286 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1287         rdatasetheader_t *header, *dcurrent;
1288         isc_boolean_t make_dirty = ISC_FALSE;
1289
1290         /*
1291          * Caller must hold the node lock.
1292          */
1293
1294         /*
1295          * We set the IGNORE attribute on rdatasets with serial number
1296          * 'serial'.  When the reference count goes to zero, these rdatasets
1297          * will be cleaned up; until that time, they will be ignored.
1298          */
1299         for (header = node->data; header != NULL; header = header->next) {
1300                 if (header->serial == serial) {
1301                         header->attributes |= RDATASET_ATTR_IGNORE;
1302                         make_dirty = ISC_TRUE;
1303                 }
1304                 for (dcurrent = header->down;
1305                      dcurrent != NULL;
1306                      dcurrent = dcurrent->down) {
1307                         if (dcurrent->serial == serial) {
1308                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1309                                 make_dirty = ISC_TRUE;
1310                         }
1311                 }
1312         }
1313         if (make_dirty)
1314                 node->dirty = 1;
1315 }
1316
1317 static inline void
1318 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1319 {
1320         rdatasetheader_t *d, *down_next;
1321
1322         for (d = top->down; d != NULL; d = down_next) {
1323                 down_next = d->down;
1324                 free_rdataset(rbtdb, mctx, d);
1325         }
1326         top->down = NULL;
1327 }
1328
1329 static inline void
1330 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1331         rdatasetheader_t *current, *top_prev, *top_next;
1332         isc_mem_t *mctx = rbtdb->common.mctx;
1333
1334         /*
1335          * Caller must be holding the node lock.
1336          */
1337
1338         top_prev = NULL;
1339         for (current = node->data; current != NULL; current = top_next) {
1340                 top_next = current->next;
1341                 clean_stale_headers(rbtdb, mctx, current);
1342                 /*
1343                  * If current is nonexistent or stale, we can clean it up.
1344                  */
1345                 if ((current->attributes &
1346                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1347                         if (top_prev != NULL)
1348                                 top_prev->next = current->next;
1349                         else
1350                                 node->data = current->next;
1351                         free_rdataset(rbtdb, mctx, current);
1352                 } else
1353                         top_prev = current;
1354         }
1355         node->dirty = 0;
1356 }
1357
1358 static inline void
1359 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1360                 rbtdb_serial_t least_serial)
1361 {
1362         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1363         rdatasetheader_t *top_prev, *top_next;
1364         isc_mem_t *mctx = rbtdb->common.mctx;
1365         isc_boolean_t still_dirty = ISC_FALSE;
1366
1367         /*
1368          * Caller must be holding the node lock.
1369          */
1370         REQUIRE(least_serial != 0);
1371
1372         top_prev = NULL;
1373         for (current = node->data; current != NULL; current = top_next) {
1374                 top_next = current->next;
1375
1376                 /*
1377                  * First, we clean up any instances of multiple rdatasets
1378                  * with the same serial number, or that have the IGNORE
1379                  * attribute.
1380                  */
1381                 dparent = current;
1382                 for (dcurrent = current->down;
1383                      dcurrent != NULL;
1384                      dcurrent = down_next) {
1385                         down_next = dcurrent->down;
1386                         INSIST(dcurrent->serial <= dparent->serial);
1387                         if (dcurrent->serial == dparent->serial ||
1388                             IGNORE(dcurrent)) {
1389                                 if (down_next != NULL)
1390                                         down_next->next = dparent;
1391                                 dparent->down = down_next;
1392                                 free_rdataset(rbtdb, mctx, dcurrent);
1393                         } else
1394                                 dparent = dcurrent;
1395                 }
1396
1397                 /*
1398                  * We've now eliminated all IGNORE datasets with the possible
1399                  * exception of current, which we now check.
1400                  */
1401                 if (IGNORE(current)) {
1402                         down_next = current->down;
1403                         if (down_next == NULL) {
1404                                 if (top_prev != NULL)
1405                                         top_prev->next = current->next;
1406                                 else
1407                                         node->data = current->next;
1408                                 free_rdataset(rbtdb, mctx, current);
1409                                 /*
1410                                  * current no longer exists, so we can
1411                                  * just continue with the loop.
1412                                  */
1413                                 continue;
1414                         } else {
1415                                 /*
1416                                  * Pull up current->down, making it the new
1417                                  * current.
1418                                  */
1419                                 if (top_prev != NULL)
1420                                         top_prev->next = down_next;
1421                                 else
1422                                         node->data = down_next;
1423                                 down_next->next = top_next;
1424                                 free_rdataset(rbtdb, mctx, current);
1425                                 current = down_next;
1426                         }
1427                 }
1428
1429                 /*
1430                  * We now try to find the first down node less than the
1431                  * least serial.
1432                  */
1433                 dparent = current;
1434                 for (dcurrent = current->down;
1435                      dcurrent != NULL;
1436                      dcurrent = down_next) {
1437                         down_next = dcurrent->down;
1438                         if (dcurrent->serial < least_serial)
1439                                 break;
1440                         dparent = dcurrent;
1441                 }
1442
1443                 /*
1444                  * If there is a such an rdataset, delete it and any older
1445                  * versions.
1446                  */
1447                 if (dcurrent != NULL) {
1448                         do {
1449                                 down_next = dcurrent->down;
1450                                 INSIST(dcurrent->serial <= least_serial);
1451                                 free_rdataset(rbtdb, mctx, dcurrent);
1452                                 dcurrent = down_next;
1453                         } while (dcurrent != NULL);
1454                         dparent->down = NULL;
1455                 }
1456
1457                 /*
1458                  * Note.  The serial number of 'current' might be less than
1459                  * least_serial too, but we cannot delete it because it is
1460                  * the most recent version, unless it is a NONEXISTENT
1461                  * rdataset.
1462                  */
1463                 if (current->down != NULL) {
1464                         still_dirty = ISC_TRUE;
1465                         top_prev = current;
1466                 } else {
1467                         /*
1468                          * If this is a NONEXISTENT rdataset, we can delete it.
1469                          */
1470                         if (NONEXISTENT(current)) {
1471                                 if (top_prev != NULL)
1472                                         top_prev->next = current->next;
1473                                 else
1474                                         node->data = current->next;
1475                                 free_rdataset(rbtdb, mctx, current);
1476                         } else
1477                                 top_prev = current;
1478                 }
1479         }
1480         if (!still_dirty)
1481                 node->dirty = 0;
1482 }
1483
1484 /*%
1485  * Clean up dead nodes.  These are nodes which have no references, and
1486  * have no data.  They are dead but we could not or chose not to delete
1487  * them when we deleted all the data at that node because we did not want
1488  * to wait for the tree write lock.
1489  *
1490  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1491  */
1492 static void
1493 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1494         dns_rbtnode_t *node;
1495         isc_result_t result;
1496         int count = 10;         /* XXXJT: should be adjustable */
1497
1498         node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1499         while (node != NULL && count > 0) {
1500                 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1501
1502                 /*
1503                  * Since we're holding a tree write lock, it should be
1504                  * impossible for this node to be referenced by others.
1505                  */
1506                 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1507                        node->data == NULL);
1508
1509                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1510                 if (node->nsec3)
1511                         result = dns_rbt_deletenode(rbtdb->nsec3, node,
1512                                                     ISC_FALSE);
1513                 else
1514                         result = dns_rbt_deletenode(rbtdb->tree, node,
1515                                                     ISC_FALSE);
1516                 if (result != ISC_R_SUCCESS)
1517                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1518                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1519                                       "cleanup_dead_nodes: "
1520                                       "dns_rbt_deletenode: %s",
1521                                       isc_result_totext(result));
1522                 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1523                 count--;
1524         }
1525 }
1526
1527 /*
1528  * Caller must be holding the node lock if its reference must be protected
1529  * by the lock.
1530  */
1531 static inline void
1532 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1533         unsigned int lockrefs, noderefs;
1534         isc_refcount_t *lockref;
1535
1536         dns_rbtnode_refincrement0(node, &noderefs);
1537         if (noderefs == 1) {    /* this is the first reference to the node */
1538                 lockref = &rbtdb->node_locks[node->locknum].references;
1539                 isc_refcount_increment0(lockref, &lockrefs);
1540                 INSIST(lockrefs != 0);
1541         }
1542         INSIST(noderefs != 0);
1543 }
1544
1545 /*
1546  * This function is assumed to be called when a node is newly referenced
1547  * and can be in the deadnode list.  In that case the node must be retrieved
1548  * from the list because it is going to be used.  In addition, if the caller
1549  * happens to hold a write lock on the tree, it's a good chance to purge dead
1550  * nodes.
1551  * Note: while a new reference is gained in multiple places, there are only very
1552  * few cases where the node can be in the deadnode list (only empty nodes can
1553  * have been added to the list).
1554  */
1555 static inline void
1556 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1557                 isc_rwlocktype_t treelocktype)
1558 {
1559         isc_boolean_t need_relock = ISC_FALSE;
1560
1561         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1562         new_reference(rbtdb, node);
1563
1564         NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1565                       isc_rwlocktype_read);
1566         if (ISC_LINK_LINKED(node, deadlink))
1567                 need_relock = ISC_TRUE;
1568         else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1569                  treelocktype == isc_rwlocktype_write)
1570                 need_relock = ISC_TRUE;
1571         NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1572                         isc_rwlocktype_read);
1573         if (need_relock) {
1574                 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1575                               isc_rwlocktype_write);
1576                 if (ISC_LINK_LINKED(node, deadlink))
1577                         ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1578                                         node, deadlink);
1579                 if (treelocktype == isc_rwlocktype_write)
1580                         cleanup_dead_nodes(rbtdb, node->locknum);
1581                 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1582                                 isc_rwlocktype_write);
1583         }
1584
1585         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1586 }
1587
1588 /*
1589  * Caller must be holding the node lock; either the "strong", read or write
1590  * lock.  Note that the lock must be held even when node references are
1591  * atomically modified; in that case the decrement operation itself does not
1592  * have to be protected, but we must avoid a race condition where multiple
1593  * threads are decreasing the reference to zero simultaneously and at least
1594  * one of them is going to free the node.
1595  * This function returns ISC_TRUE if and only if the node reference decreases
1596  * to zero.
1597  */
1598 static isc_boolean_t
1599 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1600                     rbtdb_serial_t least_serial,
1601                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1602                     isc_boolean_t pruning)
1603 {
1604         isc_result_t result;
1605         isc_boolean_t write_locked;
1606         rbtdb_nodelock_t *nodelock;
1607         unsigned int refs, nrefs;
1608         int bucket = node->locknum;
1609         isc_boolean_t no_reference;
1610
1611         nodelock = &rbtdb->node_locks[bucket];
1612
1613         /* Handle easy and typical case first. */
1614         if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1615                 dns_rbtnode_refdecrement(node, &nrefs);
1616                 INSIST((int)nrefs >= 0);
1617                 if (nrefs == 0) {
1618                         isc_refcount_decrement(&nodelock->references, &refs);
1619                         INSIST((int)refs >= 0);
1620                 }
1621                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1622         }
1623
1624         /* Upgrade the lock? */
1625         if (nlock == isc_rwlocktype_read) {
1626                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1627                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1628         }
1629         dns_rbtnode_refdecrement(node, &nrefs);
1630         INSIST((int)nrefs >= 0);
1631         if (nrefs > 0) {
1632                 /* Restore the lock? */
1633                 if (nlock == isc_rwlocktype_read)
1634                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1635                 return (ISC_FALSE);
1636         }
1637
1638         if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1639                 if (IS_CACHE(rbtdb))
1640                         clean_cache_node(rbtdb, node);
1641                 else {
1642                         if (least_serial == 0) {
1643                                 /*
1644                                  * Caller doesn't know the least serial.
1645                                  * Get it.
1646                                  */
1647                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1648                                 least_serial = rbtdb->least_serial;
1649                                 RBTDB_UNLOCK(&rbtdb->lock,
1650                                              isc_rwlocktype_read);
1651                         }
1652                         clean_zone_node(rbtdb, node, least_serial);
1653                 }
1654         }
1655
1656         isc_refcount_decrement(&nodelock->references, &refs);
1657         INSIST((int)refs >= 0);
1658
1659         /*
1660          * XXXDCL should this only be done for cache zones?
1661          */
1662         if (node->data != NULL || node->down != NULL) {
1663                 /* Restore the lock? */
1664                 if (nlock == isc_rwlocktype_read)
1665                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1666                 return (ISC_TRUE);
1667         }
1668
1669         /*
1670          * Attempt to switch to a write lock on the tree.  If this fails,
1671          * we will add this node to a linked list of nodes in this locking
1672          * bucket which we will free later.
1673          */
1674         if (tlock != isc_rwlocktype_write) {
1675                 /*
1676                  * Locking hierarchy notwithstanding, we don't need to free
1677                  * the node lock before acquiring the tree write lock because
1678                  * we only do a trylock.
1679                  */
1680                 if (tlock == isc_rwlocktype_read)
1681                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1682                 else
1683                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
1684                                                     isc_rwlocktype_write);
1685                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1686                               result == ISC_R_LOCKBUSY);
1687
1688                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1689         } else
1690                 write_locked = ISC_TRUE;
1691
1692         no_reference = ISC_TRUE;
1693         if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1694                 /*
1695                  * We can now delete the node if the reference counter is
1696                  * zero.  This should be typically the case, but a different
1697                  * thread may still gain a (new) reference just before the
1698                  * current thread locks the tree (e.g., in findnode()).
1699                  */
1700
1701                 /*
1702                  * If this node is the only one in the level it's in, deleting
1703                  * this node may recursively make its parent the only node in
1704                  * the parent level; if so, and if no one is currently using
1705                  * the parent node, this is almost the only opportunity to
1706                  * clean it up.  But the recursive cleanup is not that trivial
1707                  * since the child and parent may be in different lock buckets,
1708                  * which would cause a lock order reversal problem.  To avoid
1709                  * the trouble, we'll dispatch a separate event for batch
1710                  * cleaning.  We need to check whether we're deleting the node
1711                  * as a result of pruning to avoid infinite dispatching.
1712                  * Note: pruning happens only when a task has been set for the
1713                  * rbtdb.  If the user of the rbtdb chooses not to set a task,
1714                  * it's their responsibility to purge stale leaves (e.g. by
1715                  * periodic walk-through).
1716                  */
1717                 if (!pruning && node->parent != NULL &&
1718                     node->parent->down == node && node->left == NULL &&
1719                     node->right == NULL && rbtdb->task != NULL) {
1720                         isc_event_t *ev;
1721                         dns_db_t *db;
1722
1723                         ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1724                                                 DNS_EVENT_RBTPRUNE,
1725                                                 prune_tree, node,
1726                                                 sizeof(isc_event_t));
1727                         if (ev != NULL) {
1728                                 new_reference(rbtdb, node);
1729                                 db = NULL;
1730                                 attach((dns_db_t *)rbtdb, &db);
1731                                 ev->ev_sender = db;
1732                                 isc_task_send(rbtdb->task, &ev);
1733                                 no_reference = ISC_FALSE;
1734                         } else {
1735                                 /*
1736                                  * XXX: this is a weird situation.  We could
1737                                  * ignore this error case, but then the stale
1738                                  * node will unlikely be purged except via a
1739                                  * rare condition such as manual cleanup.  So
1740                                  * we queue it in the deadnodes list, hoping
1741                                  * the memory shortage is temporary and the node
1742                                  * will be deleted later.
1743                                  */
1744                                 isc_log_write(dns_lctx,
1745                                               DNS_LOGCATEGORY_DATABASE,
1746                                               DNS_LOGMODULE_CACHE,
1747                                               ISC_LOG_INFO,
1748                                               "decrement_reference: failed to "
1749                                               "allocate pruning event");
1750                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1751                                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1752                                                 deadlink);
1753                         }
1754                 } else {
1755                         if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1756                                 char printname[DNS_NAME_FORMATSIZE];
1757
1758                                 isc_log_write(dns_lctx,
1759                                               DNS_LOGCATEGORY_DATABASE,
1760                                               DNS_LOGMODULE_CACHE,
1761                                               ISC_LOG_DEBUG(1),
1762                                               "decrement_reference: "
1763                                               "delete from rbt: %p %s",
1764                                               node,
1765                                               dns_rbt_formatnodename(node,
1766                                                         printname,
1767                                                         sizeof(printname)));
1768                         }
1769
1770                         INSIST(!ISC_LINK_LINKED(node, deadlink));
1771                         if (node->nsec3)
1772                                 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1773                                                             ISC_FALSE);
1774                         else
1775                                 result = dns_rbt_deletenode(rbtdb->tree, node,
1776                                                             ISC_FALSE);
1777                         if (result != ISC_R_SUCCESS) {
1778                                 isc_log_write(dns_lctx,
1779                                               DNS_LOGCATEGORY_DATABASE,
1780                                               DNS_LOGMODULE_CACHE,
1781                                               ISC_LOG_WARNING,
1782                                               "decrement_reference: "
1783                                               "dns_rbt_deletenode: %s",
1784                                               isc_result_totext(result));
1785                         }
1786                 }
1787         } else if (dns_rbtnode_refcurrent(node) == 0) {
1788                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1789                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1790         } else
1791                 no_reference = ISC_FALSE;
1792
1793         /* Restore the lock? */
1794         if (nlock == isc_rwlocktype_read)
1795                 NODE_WEAKDOWNGRADE(&nodelock->lock);
1796
1797         /*
1798          * Relock a read lock, or unlock the write lock if no lock was held.
1799          */
1800         if (tlock == isc_rwlocktype_none)
1801                 if (write_locked)
1802                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1803
1804         if (tlock == isc_rwlocktype_read)
1805                 if (write_locked)
1806                         isc_rwlock_downgrade(&rbtdb->tree_lock);
1807
1808         return (no_reference);
1809 }
1810
1811 /*
1812  * Prune the tree by recursively cleaning-up single leaves.  In the worst
1813  * case, the number of iteration is the number of tree levels, which is at
1814  * most the maximum number of domain name labels, i.e, 127.  In practice, this
1815  * should be much smaller (only a few times), and even the worst case would be
1816  * acceptable for a single event.
1817  */
1818 static void
1819 prune_tree(isc_task_t *task, isc_event_t *event) {
1820         dns_rbtdb_t *rbtdb = event->ev_sender;
1821         dns_rbtnode_t *node = event->ev_arg;
1822         dns_rbtnode_t *parent;
1823         unsigned int locknum;
1824
1825         UNUSED(task);
1826
1827         isc_event_free(&event);
1828
1829         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1830         locknum = node->locknum;
1831         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1832         do {
1833                 parent = node->parent;
1834                 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1835                                     isc_rwlocktype_write, ISC_TRUE);
1836
1837                 if (parent != NULL && parent->down == NULL) {
1838                         /*
1839                          * node was the only down child of the parent and has
1840                          * just been removed.  We'll then need to examine the
1841                          * parent.  Keep the lock if possible; otherwise,
1842                          * release the old lock and acquire one for the parent.
1843                          */
1844                         if (parent->locknum != locknum) {
1845                                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1846                                             isc_rwlocktype_write);
1847                                 locknum = parent->locknum;
1848                                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1849                                           isc_rwlocktype_write);
1850                         }
1851
1852                         /*
1853                          * We need to gain a reference to the node before
1854                          * decrementing it in the next iteration.  In addition,
1855                          * if the node is in the dead-nodes list, extract it
1856                          * from the list beforehand as we do in
1857                          * reactivate_node().
1858                          */
1859                         new_reference(rbtdb, parent);
1860                         if (ISC_LINK_LINKED(parent, deadlink)) {
1861                                 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1862                                                 parent, deadlink);
1863                         }
1864                 } else
1865                         parent = NULL;
1866
1867                 node = parent;
1868         } while (node != NULL);
1869         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1870         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1871
1872         detach((dns_db_t **)&rbtdb);
1873 }
1874
1875 static inline void
1876 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1877                    rbtdb_changedlist_t *cleanup_list)
1878 {
1879         /*
1880          * Caller must be holding the database lock.
1881          */
1882
1883         rbtdb->least_serial = version->serial;
1884         *cleanup_list = version->changed_list;
1885         ISC_LIST_INIT(version->changed_list);
1886 }
1887
1888 static inline void
1889 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1890         rbtdb_changed_t *changed, *next_changed;
1891
1892         /*
1893          * If the changed record is dirty, then
1894          * an update created multiple versions of
1895          * a given rdataset.  We keep this list
1896          * until we're the least open version, at
1897          * which point it's safe to get rid of any
1898          * older versions.
1899          *
1900          * If the changed record isn't dirty, then
1901          * we don't need it anymore since we're
1902          * committing and not rolling back.
1903          *
1904          * The caller must be holding the database lock.
1905          */
1906         for (changed = HEAD(version->changed_list);
1907              changed != NULL;
1908              changed = next_changed) {
1909                 next_changed = NEXT(changed, link);
1910                 if (!changed->dirty) {
1911                         UNLINK(version->changed_list,
1912                                changed, link);
1913                         APPEND(*cleanup_list,
1914                                changed, link);
1915                 }
1916         }
1917 }
1918
1919 static void
1920 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1921         dns_rdataset_t keyset;
1922         dns_rdataset_t nsecset, signsecset;
1923         dns_rdata_t rdata = DNS_RDATA_INIT;
1924         isc_boolean_t haszonekey = ISC_FALSE;
1925         isc_boolean_t hasnsec = ISC_FALSE;
1926         isc_boolean_t hasoptbit = ISC_FALSE;
1927         isc_boolean_t nsec3createflag = ISC_FALSE;
1928         isc_result_t result;
1929
1930         dns_rdataset_init(&keyset);
1931         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1932                                      0, 0, &keyset, NULL);
1933         if (result == ISC_R_SUCCESS) {
1934                 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1935                 result = dns_rdataset_first(&keyset);
1936                 while (result == ISC_R_SUCCESS) {
1937                         dns_rdataset_current(&keyset, &keyrdata);
1938                         if (dns_zonekey_iszonekey(&keyrdata)) {
1939                                 haszonekey = ISC_TRUE;
1940                                 break;
1941                         }
1942                         result = dns_rdataset_next(&keyset);
1943                 }
1944                 dns_rdataset_disassociate(&keyset);
1945         }
1946         if (!haszonekey) {
1947                 version->secure = dns_db_insecure;
1948                 version->havensec3 = ISC_FALSE;
1949                 return;
1950         }
1951
1952         dns_rdataset_init(&nsecset);
1953         dns_rdataset_init(&signsecset);
1954         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1955                                      0, 0, &nsecset, &signsecset);
1956         if (result == ISC_R_SUCCESS) {
1957                 if (dns_rdataset_isassociated(&signsecset)) {
1958                         hasnsec = ISC_TRUE;
1959                         result = dns_rdataset_first(&nsecset);
1960                         if (result == ISC_R_SUCCESS) {
1961                                 dns_rdataset_current(&nsecset, &rdata);
1962                                 hasoptbit = dns_nsec_typepresent(&rdata,
1963                                                              dns_rdatatype_opt);
1964                         }
1965                         dns_rdataset_disassociate(&signsecset);
1966                 }
1967                 dns_rdataset_disassociate(&nsecset);
1968         }
1969
1970         setnsec3parameters(db, version, &nsec3createflag);
1971
1972         /*
1973          * Do we have a valid NSEC/NSEC3 chain?
1974          */
1975         if (version->havensec3 || (hasnsec && !hasoptbit))
1976                 version->secure = dns_db_secure;
1977         /*
1978          * Do we have a NSEC/NSEC3 chain under creation?
1979          */
1980         else if (hasoptbit || nsec3createflag)
1981                 version->secure = dns_db_partial;
1982         else
1983                 version->secure = dns_db_insecure;
1984 }
1985
1986 /*%<
1987  * Walk the origin node looking for NSEC3PARAM records.
1988  * Cache the nsec3 parameters.
1989  */
1990 static void
1991 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
1992                    isc_boolean_t *nsec3createflag)
1993 {
1994         dns_rbtnode_t *node;
1995         dns_rdata_nsec3param_t nsec3param;
1996         dns_rdata_t rdata = DNS_RDATA_INIT;
1997         isc_region_t region;
1998         isc_result_t result;
1999         rdatasetheader_t *header, *header_next;
2000         unsigned char *raw;             /* RDATASLAB */
2001         unsigned int count, length;
2002         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2003
2004         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2005         version->havensec3 = ISC_FALSE;
2006         node = rbtdb->origin_node;
2007         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2008                   isc_rwlocktype_read);
2009         for (header = node->data;
2010              header != NULL;
2011              header = header_next) {
2012                 header_next = header->next;
2013                 do {
2014                         if (header->serial <= version->serial &&
2015                             !IGNORE(header)) {
2016                                 if (NONEXISTENT(header))
2017                                         header = NULL;
2018                                 break;
2019                         } else
2020                                 header = header->down;
2021                 } while (header != NULL);
2022
2023                 if (header != NULL &&
2024                     header->type == dns_rdatatype_nsec3param) {
2025                         /*
2026                          * Find A NSEC3PARAM with a supported algorithm.
2027                          */
2028                         raw = (unsigned char *)header + sizeof(*header);
2029                         count = raw[0] * 256 + raw[1]; /* count */
2030 #if DNS_RDATASET_FIXED
2031                         raw += count * 4 + 2;
2032 #else
2033                         raw += 2;
2034 #endif
2035                         while (count-- > 0U) {
2036                                 length = raw[0] * 256 + raw[1];
2037 #if DNS_RDATASET_FIXED
2038                                 raw += 4;
2039 #else
2040                                 raw += 2;
2041 #endif
2042                                 region.base = raw;
2043                                 region.length = length;
2044                                 raw += length;
2045                                 dns_rdata_fromregion(&rdata,
2046                                                      rbtdb->common.rdclass,
2047                                                      dns_rdatatype_nsec3param,
2048                                                      &region);
2049                                 result = dns_rdata_tostruct(&rdata,
2050                                                             &nsec3param,
2051                                                             NULL);
2052                                 INSIST(result == ISC_R_SUCCESS);
2053                                 dns_rdata_reset(&rdata);
2054
2055                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2056                                     !dns_nsec3_supportedhash(nsec3param.hash))
2057                                         continue;
2058
2059 #ifdef RFC5155_STRICT
2060                                 if (nsec3param.flags != 0)
2061                                         continue;
2062 #else
2063                                 if ((nsec3param.flags & DNS_NSEC3FLAG_CREATE)
2064                                     != 0)
2065                                         *nsec3createflag = ISC_TRUE;
2066                                 if ((nsec3param.flags & ~DNS_NSEC3FLAG_OPTOUT)
2067                                     != 0)
2068                                         continue;
2069 #endif
2070
2071                                 memcpy(version->salt, nsec3param.salt,
2072                                        nsec3param.salt_length);
2073                                 version->hash = nsec3param.hash;
2074                                 version->salt_length = nsec3param.salt_length;
2075                                 version->iterations = nsec3param.iterations;
2076                                 version->flags = nsec3param.flags;
2077                                 version->havensec3 = ISC_TRUE;
2078                                 /*
2079                                  * Look for a better algorithm than the
2080                                  * unknown test algorithm.
2081                                  */
2082                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2083                                         goto unlock;
2084                         }
2085                 }
2086         }
2087  unlock:
2088         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2089                     isc_rwlocktype_read);
2090         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2091 }
2092
2093 static void
2094 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2095         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2096         rbtdb_version_t *version, *cleanup_version, *least_greater;
2097         isc_boolean_t rollback = ISC_FALSE;
2098         rbtdb_changedlist_t cleanup_list;
2099         rdatasetheaderlist_t resigned_list;
2100         rbtdb_changed_t *changed, *next_changed;
2101         rbtdb_serial_t serial, least_serial;
2102         dns_rbtnode_t *rbtnode;
2103         unsigned int refs;
2104         rdatasetheader_t *header;
2105         isc_boolean_t writer;
2106
2107         REQUIRE(VALID_RBTDB(rbtdb));
2108         version = (rbtdb_version_t *)*versionp;
2109
2110         cleanup_version = NULL;
2111         ISC_LIST_INIT(cleanup_list);
2112         ISC_LIST_INIT(resigned_list);
2113
2114         isc_refcount_decrement(&version->references, &refs);
2115         if (refs > 0) {         /* typical and easy case first */
2116                 if (commit) {
2117                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2118                         INSIST(!version->writer);
2119                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2120                 }
2121                 goto end;
2122         }
2123
2124         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2125         serial = version->serial;
2126         writer = version->writer;
2127         if (version->writer) {
2128                 if (commit) {
2129                         unsigned cur_ref;
2130                         rbtdb_version_t *cur_version;
2131
2132                         INSIST(version->commit_ok);
2133                         INSIST(version == rbtdb->future_version);
2134                         /*
2135                          * The current version is going to be replaced.
2136                          * Release the (likely last) reference to it from the
2137                          * DB itself and unlink it from the open list.
2138                          */
2139                         cur_version = rbtdb->current_version;
2140                         isc_refcount_decrement(&cur_version->references,
2141                                                &cur_ref);
2142                         if (cur_ref == 0) {
2143                                 if (cur_version->serial == rbtdb->least_serial)
2144                                         INSIST(EMPTY(cur_version->changed_list));
2145                                 UNLINK(rbtdb->open_versions,
2146                                        cur_version, link);
2147                         }
2148                         if (EMPTY(rbtdb->open_versions)) {
2149                                 /*
2150                                  * We're going to become the least open
2151                                  * version.
2152                                  */
2153                                 make_least_version(rbtdb, version,
2154                                                    &cleanup_list);
2155                         } else {
2156                                 /*
2157                                  * Some other open version is the
2158                                  * least version.  We can't cleanup
2159                                  * records that were changed in this
2160                                  * version because the older versions
2161                                  * may still be in use by an open
2162                                  * version.
2163                                  *
2164                                  * We can, however, discard the
2165                                  * changed records for things that
2166                                  * we've added that didn't exist in
2167                                  * prior versions.
2168                                  */
2169                                 cleanup_nondirty(version, &cleanup_list);
2170                         }
2171                         /*
2172                          * If the (soon to be former) current version
2173                          * isn't being used by anyone, we can clean
2174                          * it up.
2175                          */
2176                         if (cur_ref == 0) {
2177                                 cleanup_version = cur_version;
2178                                 APPENDLIST(version->changed_list,
2179                                            cleanup_version->changed_list,
2180                                            link);
2181                         }
2182                         /*
2183                          * Become the current version.
2184                          */
2185                         version->writer = ISC_FALSE;
2186                         rbtdb->current_version = version;
2187                         rbtdb->current_serial = version->serial;
2188                         rbtdb->future_version = NULL;
2189
2190                         /*
2191                          * Keep the current version in the open list, and
2192                          * gain a reference for the DB itself (see the DB
2193                          * creation function below).  This must be the only
2194                          * case where we need to increment the counter from
2195                          * zero and need to use isc_refcount_increment0().
2196                          */
2197                         isc_refcount_increment0(&version->references,
2198                                                 &cur_ref);
2199                         INSIST(cur_ref == 1);
2200                         PREPEND(rbtdb->open_versions,
2201                                 rbtdb->current_version, link);
2202                         resigned_list = version->resigned_list;
2203                         ISC_LIST_INIT(version->resigned_list);
2204                 } else {
2205                         /*
2206                          * We're rolling back this transaction.
2207                          */
2208                         cleanup_list = version->changed_list;
2209                         ISC_LIST_INIT(version->changed_list);
2210                         resigned_list = version->resigned_list;
2211                         ISC_LIST_INIT(version->resigned_list);
2212                         rollback = ISC_TRUE;
2213                         cleanup_version = version;
2214                         rbtdb->future_version = NULL;
2215                 }
2216         } else {
2217                 if (version != rbtdb->current_version) {
2218                         /*
2219                          * There are no external or internal references
2220                          * to this version and it can be cleaned up.
2221                          */
2222                         cleanup_version = version;
2223
2224                         /*
2225                          * Find the version with the least serial
2226                          * number greater than ours.
2227                          */
2228                         least_greater = PREV(version, link);
2229                         if (least_greater == NULL)
2230                                 least_greater = rbtdb->current_version;
2231
2232                         INSIST(version->serial < least_greater->serial);
2233                         /*
2234                          * Is this the least open version?
2235                          */
2236                         if (version->serial == rbtdb->least_serial) {
2237                                 /*
2238                                  * Yes.  Install the new least open
2239                                  * version.
2240                                  */
2241                                 make_least_version(rbtdb,
2242                                                    least_greater,
2243                                                    &cleanup_list);
2244                         } else {
2245                                 /*
2246                                  * Add any unexecuted cleanups to
2247                                  * those of the least greater version.
2248                                  */
2249                                 APPENDLIST(least_greater->changed_list,
2250                                            version->changed_list,
2251                                            link);
2252                         }
2253                 } else if (version->serial == rbtdb->least_serial)
2254                         INSIST(EMPTY(version->changed_list));
2255                 UNLINK(rbtdb->open_versions, version, link);
2256         }
2257         least_serial = rbtdb->least_serial;
2258         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2259
2260         /*
2261          * Update the zone's secure status.
2262          */
2263         if (writer && commit && !IS_CACHE(rbtdb))
2264                 iszonesecure(db, version, rbtdb->origin_node);
2265
2266         if (cleanup_version != NULL) {
2267                 INSIST(EMPTY(cleanup_version->changed_list));
2268                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2269                             sizeof(*cleanup_version));
2270         }
2271
2272         /*
2273          * Commit/rollback re-signed headers.
2274          */
2275         for (header = HEAD(resigned_list);
2276              header != NULL;
2277              header = HEAD(resigned_list)) {
2278                 nodelock_t *lock;
2279
2280                 ISC_LIST_UNLINK(resigned_list, header, link);
2281
2282                 lock = &rbtdb->node_locks[header->node->locknum].lock;
2283                 NODE_LOCK(lock, isc_rwlocktype_write);
2284                 if (rollback)
2285                         resign_insert(rbtdb, header->node->locknum, header);
2286                 decrement_reference(rbtdb, header->node, least_serial,
2287                                     isc_rwlocktype_write, isc_rwlocktype_none,
2288                                     ISC_FALSE);
2289                 NODE_UNLOCK(lock, isc_rwlocktype_write);
2290         }
2291
2292         if (!EMPTY(cleanup_list)) {
2293                 /*
2294                  * We acquire a tree write lock here in order to make sure
2295                  * that stale nodes will be removed in decrement_reference().
2296                  * If we didn't have the lock, those nodes could miss the
2297                  * chance to be removed until the server stops.  The write lock
2298                  * is expensive, but this event should be rare enough to justify
2299                  * the cost.
2300                  */
2301                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2302                 for (changed = HEAD(cleanup_list);
2303                      changed != NULL;
2304                      changed = next_changed) {
2305                         nodelock_t *lock;
2306
2307                         next_changed = NEXT(changed, link);
2308                         rbtnode = changed->node;
2309                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2310
2311                         NODE_LOCK(lock, isc_rwlocktype_write);
2312                         /*
2313                          * This is a good opportunity to purge any dead nodes,
2314                          * so use it.
2315                          */
2316                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2317
2318                         if (rollback)
2319                                 rollback_node(rbtnode, serial);
2320                         decrement_reference(rbtdb, rbtnode, least_serial,
2321                                             isc_rwlocktype_write,
2322                                             isc_rwlocktype_write, ISC_FALSE);
2323
2324                         NODE_UNLOCK(lock, isc_rwlocktype_write);
2325
2326                         isc_mem_put(rbtdb->common.mctx, changed,
2327                                     sizeof(*changed));
2328                 }
2329                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2330         }
2331
2332  end:
2333         *versionp = NULL;
2334 }
2335
2336 /*
2337  * Add the necessary magic for the wildcard name 'name'
2338  * to be found in 'rbtdb'.
2339  *
2340  * In order for wildcard matching to work correctly in
2341  * zone_find(), we must ensure that a node for the wildcarding
2342  * level exists in the database, and has its 'find_callback'
2343  * and 'wild' bits set.
2344  *
2345  * E.g. if the wildcard name is "*.sub.example." then we
2346  * must ensure that "sub.example." exists and is marked as
2347  * a wildcard level.
2348  */
2349 static isc_result_t
2350 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2351         isc_result_t result;
2352         dns_name_t foundname;
2353         dns_offsets_t offsets;
2354         unsigned int n;
2355         dns_rbtnode_t *node = NULL;
2356
2357         dns_name_init(&foundname, offsets);
2358         n = dns_name_countlabels(name);
2359         INSIST(n >= 2);
2360         n--;
2361         dns_name_getlabelsequence(name, 1, n, &foundname);
2362         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2363         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2364                 return (result);
2365         node->nsec3 = 0;
2366         node->find_callback = 1;
2367         node->wild = 1;
2368         return (ISC_R_SUCCESS);
2369 }
2370
2371 static isc_result_t
2372 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2373         isc_result_t result;
2374         dns_name_t foundname;
2375         dns_offsets_t offsets;
2376         unsigned int n, l, i;
2377
2378         dns_name_init(&foundname, offsets);
2379         n = dns_name_countlabels(name);
2380         l = dns_name_countlabels(&rbtdb->common.origin);
2381         i = l + 1;
2382         while (i < n) {
2383                 dns_rbtnode_t *node = NULL;     /* dummy */
2384                 dns_name_getlabelsequence(name, n - i, i, &foundname);
2385                 if (dns_name_iswildcard(&foundname)) {
2386                         result = add_wildcard_magic(rbtdb, &foundname);
2387                         if (result != ISC_R_SUCCESS)
2388                                 return (result);
2389                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
2390                                                  &node);
2391                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2392                                 return (result);
2393                         node->nsec3 = 0;
2394                 }
2395                 i++;
2396         }
2397         return (ISC_R_SUCCESS);
2398 }
2399
2400 static isc_result_t
2401 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2402          dns_dbnode_t **nodep)
2403 {
2404         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2405         dns_rbtnode_t *node = NULL;
2406         dns_name_t nodename;
2407         isc_result_t result;
2408         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2409
2410         REQUIRE(VALID_RBTDB(rbtdb));
2411
2412         dns_name_init(&nodename, NULL);
2413         RWLOCK(&rbtdb->tree_lock, locktype);
2414         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2415                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2416         if (result != ISC_R_SUCCESS) {
2417                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2418                 if (!create) {
2419                         if (result == DNS_R_PARTIALMATCH)
2420                                 result = ISC_R_NOTFOUND;
2421                         return (result);
2422                 }
2423                 /*
2424                  * It would be nice to try to upgrade the lock instead of
2425                  * unlocking then relocking.
2426                  */
2427                 locktype = isc_rwlocktype_write;
2428                 RWLOCK(&rbtdb->tree_lock, locktype);
2429                 node = NULL;
2430                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2431                 if (result == ISC_R_SUCCESS) {
2432                         dns_rbt_namefromnode(node, &nodename);
2433 #ifdef DNS_RBT_USEHASH
2434                         node->locknum = node->hashval % rbtdb->node_lock_count;
2435 #else
2436                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2437                                 rbtdb->node_lock_count;
2438 #endif
2439                         node->nsec3 = 0;
2440                         add_empty_wildcards(rbtdb, name);
2441
2442                         if (dns_name_iswildcard(name)) {
2443                                 result = add_wildcard_magic(rbtdb, name);
2444                                 if (result != ISC_R_SUCCESS) {
2445                                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2446                                         return (result);
2447                                 }
2448                         }
2449                 } else if (result != ISC_R_EXISTS) {
2450                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2451                         return (result);
2452                 }
2453         }
2454         reactivate_node(rbtdb, node, locktype);
2455         RWUNLOCK(&rbtdb->tree_lock, locktype);
2456
2457         *nodep = (dns_dbnode_t *)node;
2458
2459         return (ISC_R_SUCCESS);
2460 }
2461
2462 static isc_result_t
2463 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2464               dns_dbnode_t **nodep)
2465 {
2466         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2467         dns_rbtnode_t *node = NULL;
2468         dns_name_t nodename;
2469         isc_result_t result;
2470         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2471
2472         REQUIRE(VALID_RBTDB(rbtdb));
2473
2474         dns_name_init(&nodename, NULL);
2475         RWLOCK(&rbtdb->tree_lock, locktype);
2476         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2477                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2478         if (result != ISC_R_SUCCESS) {
2479                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2480                 if (!create) {
2481                         if (result == DNS_R_PARTIALMATCH)
2482                                 result = ISC_R_NOTFOUND;
2483                         return (result);
2484                 }
2485                 /*
2486                  * It would be nice to try to upgrade the lock instead of
2487                  * unlocking then relocking.
2488                  */
2489                 locktype = isc_rwlocktype_write;
2490                 RWLOCK(&rbtdb->tree_lock, locktype);
2491                 node = NULL;
2492                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2493                 if (result == ISC_R_SUCCESS) {
2494                         dns_rbt_namefromnode(node, &nodename);
2495 #ifdef DNS_RBT_USEHASH
2496                         node->locknum = node->hashval % rbtdb->node_lock_count;
2497 #else
2498                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2499                                 rbtdb->node_lock_count;
2500 #endif
2501                         node->nsec3 = 1U;
2502                 } else if (result != ISC_R_EXISTS) {
2503                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2504                         return (result);
2505                 }
2506         } else
2507                 INSIST(node->nsec3);
2508         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2509         new_reference(rbtdb, node);
2510         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2511         RWUNLOCK(&rbtdb->tree_lock, locktype);
2512
2513         *nodep = (dns_dbnode_t *)node;
2514
2515         return (ISC_R_SUCCESS);
2516 }
2517
2518 static isc_result_t
2519 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2520         rbtdb_search_t *search = arg;
2521         rdatasetheader_t *header, *header_next;
2522         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2523         rdatasetheader_t *found;
2524         isc_result_t result;
2525         dns_rbtnode_t *onode;
2526
2527         /*
2528          * We only want to remember the topmost zone cut, since it's the one
2529          * that counts, so we'll just continue if we've already found a
2530          * zonecut.
2531          */
2532         if (search->zonecut != NULL)
2533                 return (DNS_R_CONTINUE);
2534
2535         found = NULL;
2536         result = DNS_R_CONTINUE;
2537         onode = search->rbtdb->origin_node;
2538
2539         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2540                   isc_rwlocktype_read);
2541
2542         /*
2543          * Look for an NS or DNAME rdataset active in our version.
2544          */
2545         ns_header = NULL;
2546         dname_header = NULL;
2547         sigdname_header = NULL;
2548         for (header = node->data; header != NULL; header = header_next) {
2549                 header_next = header->next;
2550                 if (header->type == dns_rdatatype_ns ||
2551                     header->type == dns_rdatatype_dname ||
2552                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
2553                         do {
2554                                 if (header->serial <= search->serial &&
2555                                     !IGNORE(header)) {
2556                                         /*
2557                                          * Is this a "this rdataset doesn't
2558                                          * exist" record?
2559                                          */
2560                                         if (NONEXISTENT(header))
2561                                                 header = NULL;
2562                                         break;
2563                                 } else
2564                                         header = header->down;
2565                         } while (header != NULL);
2566                         if (header != NULL) {
2567                                 if (header->type == dns_rdatatype_dname)
2568                                         dname_header = header;
2569                                 else if (header->type ==
2570                                            RBTDB_RDATATYPE_SIGDNAME)
2571                                         sigdname_header = header;
2572                                 else if (node != onode ||
2573                                          IS_STUB(search->rbtdb)) {
2574                                         /*
2575                                          * We've found an NS rdataset that
2576                                          * isn't at the origin node.  We check
2577                                          * that they're not at the origin node,
2578                                          * because otherwise we'd erroneously
2579                                          * treat the zone top as if it were
2580                                          * a delegation.
2581                                          */
2582                                         ns_header = header;
2583                                 }
2584                         }
2585                 }
2586         }
2587
2588         /*
2589          * Did we find anything?
2590          */
2591         if (dname_header != NULL) {
2592                 /*
2593                  * Note that DNAME has precedence over NS if both exist.
2594                  */
2595                 found = dname_header;
2596                 search->zonecut_sigrdataset = sigdname_header;
2597         } else if (ns_header != NULL) {
2598                 found = ns_header;
2599                 search->zonecut_sigrdataset = NULL;
2600         }
2601
2602         if (found != NULL) {
2603                 /*
2604                  * We increment the reference count on node to ensure that
2605                  * search->zonecut_rdataset will still be valid later.
2606                  */
2607                 new_reference(search->rbtdb, node);
2608                 search->zonecut = node;
2609                 search->zonecut_rdataset = found;
2610                 search->need_cleanup = ISC_TRUE;
2611                 /*
2612                  * Since we've found a zonecut, anything beneath it is
2613                  * glue and is not subject to wildcard matching, so we
2614                  * may clear search->wild.
2615                  */
2616                 search->wild = ISC_FALSE;
2617                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2618                         /*
2619                          * If the caller does not want to find glue, then
2620                          * this is the best answer and the search should
2621                          * stop now.
2622                          */
2623                         result = DNS_R_PARTIALMATCH;
2624                 } else {
2625                         dns_name_t *zcname;
2626
2627                         /*
2628                          * The search will continue beneath the zone cut.
2629                          * This may or may not be the best match.  In case it
2630                          * is, we need to remember the node name.
2631                          */
2632                         zcname = dns_fixedname_name(&search->zonecut_name);
2633                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2634                                       ISC_R_SUCCESS);
2635                         search->copy_name = ISC_TRUE;
2636                 }
2637         } else {
2638                 /*
2639                  * There is no zonecut at this node which is active in this
2640                  * version.
2641                  *
2642                  * If this is a "wild" node and the caller hasn't disabled
2643                  * wildcard matching, remember that we've seen a wild node
2644                  * in case we need to go searching for wildcard matches
2645                  * later on.
2646                  */
2647                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2648                         search->wild = ISC_TRUE;
2649         }
2650
2651         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2652                     isc_rwlocktype_read);
2653
2654         return (result);
2655 }
2656
2657 static inline void
2658 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2659               rdatasetheader_t *header, isc_stdtime_t now,
2660               dns_rdataset_t *rdataset)
2661 {
2662         unsigned char *raw;     /* RDATASLAB */
2663
2664         /*
2665          * Caller must be holding the node reader lock.
2666          * XXXJT: technically, we need a writer lock, since we'll increment
2667          * the header count below.  However, since the actual counter value
2668          * doesn't matter, we prioritize performance here.  (We may want to
2669          * use atomic increment when available).
2670          */
2671
2672         if (rdataset == NULL)
2673                 return;
2674
2675         new_reference(rbtdb, node);
2676
2677         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
2678
2679         rdataset->methods = &rdataset_methods;
2680         rdataset->rdclass = rbtdb->common.rdclass;
2681         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2682         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2683         rdataset->ttl = header->rdh_ttl - now;
2684         rdataset->trust = header->trust;
2685         if (NXDOMAIN(header))
2686                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2687         if (OPTOUT(header))
2688                 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2689         rdataset->private1 = rbtdb;
2690         rdataset->private2 = node;
2691         raw = (unsigned char *)header + sizeof(*header);
2692         rdataset->private3 = raw;
2693         rdataset->count = header->count++;
2694         if (rdataset->count == ISC_UINT32_MAX)
2695                 rdataset->count = 0;
2696
2697         /*
2698          * Reset iterator state.
2699          */
2700         rdataset->privateuint4 = 0;
2701         rdataset->private5 = NULL;
2702
2703         /*
2704          * Add noqname proof.
2705          */
2706         rdataset->private6 = header->noqname;
2707         if (rdataset->private6 != NULL)
2708                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
2709         rdataset->private7 = header->closest;
2710         if (rdataset->private7 != NULL)
2711                 rdataset->attributes |=  DNS_RDATASETATTR_CLOSEST;
2712
2713         /*
2714          * Copy out re-signing information.
2715          */
2716         if (RESIGN(header)) {
2717                 rdataset->attributes |=  DNS_RDATASETATTR_RESIGN;
2718                 rdataset->resign = header->resign;
2719         } else
2720                 rdataset->resign = 0;
2721 }
2722
2723 static inline isc_result_t
2724 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2725                  dns_name_t *foundname, dns_rdataset_t *rdataset,
2726                  dns_rdataset_t *sigrdataset)
2727 {
2728         isc_result_t result;
2729         dns_name_t *zcname;
2730         rbtdb_rdatatype_t type;
2731         dns_rbtnode_t *node;
2732
2733         /*
2734          * The caller MUST NOT be holding any node locks.
2735          */
2736
2737         node = search->zonecut;
2738         type = search->zonecut_rdataset->type;
2739
2740         /*
2741          * If we have to set foundname, we do it before anything else.
2742          * If we were to set foundname after we had set nodep or bound the
2743          * rdataset, then we'd have to undo that work if dns_name_copy()
2744          * failed.  By setting foundname first, there's nothing to undo if
2745          * we have trouble.
2746          */
2747         if (foundname != NULL && search->copy_name) {
2748                 zcname = dns_fixedname_name(&search->zonecut_name);
2749                 result = dns_name_copy(zcname, foundname, NULL);
2750                 if (result != ISC_R_SUCCESS)
2751                         return (result);
2752         }
2753         if (nodep != NULL) {
2754                 /*
2755                  * Note that we don't have to increment the node's reference
2756                  * count here because we're going to use the reference we
2757                  * already have in the search block.
2758                  */
2759                 *nodep = node;
2760                 search->need_cleanup = ISC_FALSE;
2761         }
2762         if (rdataset != NULL) {
2763                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2764                           isc_rwlocktype_read);
2765                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2766                               search->now, rdataset);
2767                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2768                         bind_rdataset(search->rbtdb, node,
2769                                       search->zonecut_sigrdataset,
2770                                       search->now, sigrdataset);
2771                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2772                             isc_rwlocktype_read);
2773         }
2774
2775         if (type == dns_rdatatype_dname)
2776                 return (DNS_R_DNAME);
2777         return (DNS_R_DELEGATION);
2778 }
2779
2780 static inline isc_boolean_t
2781 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2782            dns_rbtnode_t *node)
2783 {
2784         unsigned char *raw;     /* RDATASLAB */
2785         unsigned int count, size;
2786         dns_name_t ns_name;
2787         isc_boolean_t valid = ISC_FALSE;
2788         dns_offsets_t offsets;
2789         isc_region_t region;
2790         rdatasetheader_t *header;
2791
2792         /*
2793          * No additional locking is required.
2794          */
2795
2796         /*
2797          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
2798          * if it occurs at a zone cut, but is not valid below it.
2799          */
2800         if (type == dns_rdatatype_ns) {
2801                 if (node != search->zonecut) {
2802                         return (ISC_FALSE);
2803                 }
2804         } else if (type != dns_rdatatype_a &&
2805                    type != dns_rdatatype_aaaa &&
2806                    type != dns_rdatatype_a6) {
2807                 return (ISC_FALSE);
2808         }
2809
2810         header = search->zonecut_rdataset;
2811         raw = (unsigned char *)header + sizeof(*header);
2812         count = raw[0] * 256 + raw[1];
2813 #if DNS_RDATASET_FIXED
2814         raw += 2 + (4 * count);
2815 #else
2816         raw += 2;
2817 #endif
2818
2819         while (count > 0) {
2820                 count--;
2821                 size = raw[0] * 256 + raw[1];
2822 #if DNS_RDATASET_FIXED
2823                 raw += 4;
2824 #else
2825                 raw += 2;
2826 #endif
2827                 region.base = raw;
2828                 region.length = size;
2829                 raw += size;
2830                 /*
2831                  * XXX Until we have rdata structures, we have no choice but
2832                  * to directly access the rdata format.
2833                  */
2834                 dns_name_init(&ns_name, offsets);
2835                 dns_name_fromregion(&ns_name, &region);
2836                 if (dns_name_compare(&ns_name, name) == 0) {
2837                         valid = ISC_TRUE;
2838                         break;
2839                 }
2840         }
2841
2842         return (valid);
2843 }
2844
2845 static inline isc_boolean_t
2846 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2847             dns_name_t *name)
2848 {
2849         dns_fixedname_t fnext;
2850         dns_fixedname_t forigin;
2851         dns_name_t *next;
2852         dns_name_t *origin;
2853         dns_name_t prefix;
2854         dns_rbtdb_t *rbtdb;
2855         dns_rbtnode_t *node;
2856         isc_result_t result;
2857         isc_boolean_t answer = ISC_FALSE;
2858         rdatasetheader_t *header;
2859
2860         rbtdb = search->rbtdb;
2861
2862         dns_name_init(&prefix, NULL);
2863         dns_fixedname_init(&fnext);
2864         next = dns_fixedname_name(&fnext);
2865         dns_fixedname_init(&forigin);
2866         origin = dns_fixedname_name(&forigin);
2867
2868         result = dns_rbtnodechain_next(chain, NULL, NULL);
2869         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2870                 node = NULL;
2871                 result = dns_rbtnodechain_current(chain, &prefix,
2872                                                   origin, &node);
2873                 if (result != ISC_R_SUCCESS)
2874                         break;
2875                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2876                           isc_rwlocktype_read);
2877                 for (header = node->data;
2878                      header != NULL;
2879                      header = header->next) {
2880                         if (header->serial <= search->serial &&
2881                             !IGNORE(header) && EXISTS(header))
2882                                 break;
2883                 }
2884                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2885                             isc_rwlocktype_read);
2886                 if (header != NULL)
2887                         break;
2888                 result = dns_rbtnodechain_next(chain, NULL, NULL);
2889         }
2890         if (result == ISC_R_SUCCESS)
2891                 result = dns_name_concatenate(&prefix, origin, next, NULL);
2892         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2893                 answer = ISC_TRUE;
2894         return (answer);
2895 }
2896
2897 static inline isc_boolean_t
2898 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2899         dns_fixedname_t fnext;
2900         dns_fixedname_t forigin;
2901         dns_fixedname_t fprev;
2902         dns_name_t *next;
2903         dns_name_t *origin;
2904         dns_name_t *prev;
2905         dns_name_t name;
2906         dns_name_t rname;
2907         dns_name_t tname;
2908         dns_rbtdb_t *rbtdb;
2909         dns_rbtnode_t *node;
2910         dns_rbtnodechain_t chain;
2911         isc_boolean_t check_next = ISC_TRUE;
2912         isc_boolean_t check_prev = ISC_TRUE;
2913         isc_boolean_t answer = ISC_FALSE;
2914         isc_result_t result;
2915         rdatasetheader_t *header;
2916         unsigned int n;
2917
2918         rbtdb = search->rbtdb;
2919
2920         dns_name_init(&name, NULL);
2921         dns_name_init(&tname, NULL);
2922         dns_name_init(&rname, NULL);
2923         dns_fixedname_init(&fnext);
2924         next = dns_fixedname_name(&fnext);
2925         dns_fixedname_init(&fprev);
2926         prev = dns_fixedname_name(&fprev);
2927         dns_fixedname_init(&forigin);
2928         origin = dns_fixedname_name(&forigin);
2929
2930         /*
2931          * Find if qname is at or below a empty node.
2932          * Use our own copy of the chain.
2933          */
2934
2935         chain = search->chain;
2936         do {
2937                 node = NULL;
2938                 result = dns_rbtnodechain_current(&chain, &name,
2939                                                   origin, &node);
2940                 if (result != ISC_R_SUCCESS)
2941                         break;
2942                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2943                           isc_rwlocktype_read);
2944                 for (header = node->data;
2945                      header != NULL;
2946                      header = header->next) {
2947                         if (header->serial <= search->serial &&
2948                             !IGNORE(header) && EXISTS(header))
2949                                 break;
2950                 }
2951                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2952                             isc_rwlocktype_read);
2953                 if (header != NULL)
2954                         break;
2955                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
2956         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
2957         if (result == ISC_R_SUCCESS)
2958                 result = dns_name_concatenate(&name, origin, prev, NULL);
2959         if (result != ISC_R_SUCCESS)
2960                 check_prev = ISC_FALSE;
2961
2962         result = dns_rbtnodechain_next(&chain, NULL, NULL);
2963         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2964                 node = NULL;
2965                 result = dns_rbtnodechain_current(&chain, &name,
2966                                                   origin, &node);
2967                 if (result != ISC_R_SUCCESS)
2968                         break;
2969                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2970                           isc_rwlocktype_read);
2971                 for (header = node->data;
2972                      header != NULL;
2973                      header = header->next) {
2974                         if (header->serial <= search->serial &&
2975                             !IGNORE(header) && EXISTS(header))
2976                                 break;
2977                 }
2978                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2979                             isc_rwlocktype_read);
2980                 if (header != NULL)
2981                         break;
2982                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2983         }
2984         if (result == ISC_R_SUCCESS)
2985                 result = dns_name_concatenate(&name, origin, next, NULL);
2986         if (result != ISC_R_SUCCESS)
2987                 check_next = ISC_FALSE;
2988
2989         dns_name_clone(qname, &rname);
2990
2991         /*
2992          * Remove the wildcard label to find the terminal name.
2993          */
2994         n = dns_name_countlabels(wname);
2995         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
2996
2997         do {
2998                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
2999                     (check_next && dns_name_issubdomain(next, &rname))) {
3000                         answer = ISC_TRUE;
3001                         break;
3002                 }
3003                 /*
3004                  * Remove the left hand label.
3005                  */
3006                 n = dns_name_countlabels(&rname);
3007                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3008         } while (!dns_name_equal(&rname, &tname));
3009         return (answer);
3010 }
3011
3012 static inline isc_result_t
3013 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3014               dns_name_t *qname)
3015 {
3016         unsigned int i, j;
3017         dns_rbtnode_t *node, *level_node, *wnode;
3018         rdatasetheader_t *header;
3019         isc_result_t result = ISC_R_NOTFOUND;
3020         dns_name_t name;
3021         dns_name_t *wname;
3022         dns_fixedname_t fwname;
3023         dns_rbtdb_t *rbtdb;
3024         isc_boolean_t done, wild, active;
3025         dns_rbtnodechain_t wchain;
3026
3027         /*
3028          * Caller must be holding the tree lock and MUST NOT be holding
3029          * any node locks.
3030          */
3031
3032         /*
3033          * Examine each ancestor level.  If the level's wild bit
3034          * is set, then construct the corresponding wildcard name and
3035          * search for it.  If the wildcard node exists, and is active in
3036          * this version, we're done.  If not, then we next check to see
3037          * if the ancestor is active in this version.  If so, then there
3038          * can be no possible wildcard match and again we're done.  If not,
3039          * continue the search.
3040          */
3041
3042         rbtdb = search->rbtdb;
3043         i = search->chain.level_matches;
3044         done = ISC_FALSE;
3045         node = *nodep;
3046         do {
3047                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3048                           isc_rwlocktype_read);
3049
3050                 /*
3051                  * First we try to figure out if this node is active in
3052                  * the search's version.  We do this now, even though we
3053                  * may not need the information, because it simplifies the
3054                  * locking and code flow.
3055                  */
3056                 for (header = node->data;
3057                      header != NULL;
3058                      header = header->next) {
3059                         if (header->serial <= search->serial &&
3060                             !IGNORE(header) && EXISTS(header))
3061                                 break;
3062                 }
3063                 if (header != NULL)
3064                         active = ISC_TRUE;
3065                 else
3066                         active = ISC_FALSE;
3067
3068                 if (node->wild)
3069                         wild = ISC_TRUE;
3070                 else
3071                         wild = ISC_FALSE;
3072
3073                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3074                             isc_rwlocktype_read);
3075
3076                 if (wild) {
3077                         /*
3078                          * Construct the wildcard name for this level.
3079                          */
3080                         dns_name_init(&name, NULL);
3081                         dns_rbt_namefromnode(node, &name);
3082                         dns_fixedname_init(&fwname);
3083                         wname = dns_fixedname_name(&fwname);
3084                         result = dns_name_concatenate(dns_wildcardname, &name,
3085                                                       wname, NULL);
3086                         j = i;
3087                         while (result == ISC_R_SUCCESS && j != 0) {
3088                                 j--;
3089                                 level_node = search->chain.levels[j];
3090                                 dns_name_init(&name, NULL);
3091                                 dns_rbt_namefromnode(level_node, &name);
3092                                 result = dns_name_concatenate(wname,
3093                                                               &name,
3094                                                               wname,
3095                                                               NULL);
3096                         }
3097                         if (result != ISC_R_SUCCESS)
3098                                 break;
3099
3100                         wnode = NULL;
3101                         dns_rbtnodechain_init(&wchain, NULL);
3102                         result = dns_rbt_findnode(rbtdb->tree, wname,
3103                                                   NULL, &wnode, &wchain,
3104                                                   DNS_RBTFIND_EMPTYDATA,
3105                                                   NULL, NULL);
3106                         if (result == ISC_R_SUCCESS) {
3107                                 nodelock_t *lock;
3108
3109                                 /*
3110                                  * We have found the wildcard node.  If it
3111                                  * is active in the search's version, we're
3112                                  * done.
3113                                  */
3114                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
3115                                 NODE_LOCK(lock, isc_rwlocktype_read);
3116                                 for (header = wnode->data;
3117                                      header != NULL;
3118                                      header = header->next) {
3119                                         if (header->serial <= search->serial &&
3120                                             !IGNORE(header) && EXISTS(header))
3121                                                 break;
3122                                 }
3123                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3124                                 if (header != NULL ||
3125                                     activeempty(search, &wchain, wname)) {
3126                                         if (activeemtpynode(search, qname,
3127                                                             wname)) {
3128                                                 return (ISC_R_NOTFOUND);
3129                                         }
3130                                         /*
3131                                          * The wildcard node is active!
3132                                          *
3133                                          * Note: result is still ISC_R_SUCCESS
3134                                          * so we don't have to set it.
3135                                          */
3136                                         *nodep = wnode;
3137                                         break;
3138                                 }
3139                         } else if (result != ISC_R_NOTFOUND &&
3140                                    result != DNS_R_PARTIALMATCH) {
3141                                 /*
3142                                  * An error has occurred.  Bail out.
3143                                  */
3144                                 break;
3145                         }
3146                 }
3147
3148                 if (active) {
3149                         /*
3150                          * The level node is active.  Any wildcarding
3151                          * present at higher levels has no
3152                          * effect and we're done.
3153                          */
3154                         result = ISC_R_NOTFOUND;
3155                         break;
3156                 }
3157
3158                 if (i > 0) {
3159                         i--;
3160                         node = search->chain.levels[i];
3161                 } else
3162                         done = ISC_TRUE;
3163         } while (!done);
3164
3165         return (result);
3166 }
3167
3168 static isc_boolean_t
3169 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3170 {
3171         dns_rdata_t rdata = DNS_RDATA_INIT;
3172         dns_rdata_nsec3_t nsec3;
3173         unsigned char *raw;                     /* RDATASLAB */
3174         unsigned int rdlen, count;
3175         isc_region_t region;
3176         isc_result_t result;
3177
3178         REQUIRE(header->type == dns_rdatatype_nsec3);
3179
3180         raw = (unsigned char *)header + sizeof(*header);
3181         count = raw[0] * 256 + raw[1]; /* count */
3182 #if DNS_RDATASET_FIXED
3183         raw += count * 4 + 2;
3184 #else
3185         raw += 2;
3186 #endif
3187         while (count-- > 0) {
3188                 rdlen = raw[0] * 256 + raw[1];
3189 #if DNS_RDATASET_FIXED
3190                 raw += 4;
3191 #else
3192                 raw += 2;
3193 #endif
3194                 region.base = raw;
3195                 region.length = rdlen;
3196                 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3197                                      dns_rdatatype_nsec3, &region);
3198                 raw += rdlen;
3199                 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3200                 INSIST(result == ISC_R_SUCCESS);
3201                 if (nsec3.hash == search->rbtversion->hash &&
3202                     nsec3.iterations == search->rbtversion->iterations &&
3203                     nsec3.salt_length == search->rbtversion->salt_length &&
3204                     memcmp(nsec3.salt, search->rbtversion->salt,
3205                            nsec3.salt_length) == 0)
3206                         return (ISC_TRUE);
3207                 dns_rdata_reset(&rdata);
3208         }
3209         return (ISC_FALSE);
3210 }
3211
3212 static inline isc_result_t
3213 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3214                   dns_name_t *foundname, dns_rdataset_t *rdataset,
3215                   dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3216                   dns_db_secure_t secure)
3217 {
3218         dns_rbtnode_t *node;
3219         rdatasetheader_t *header, *header_next, *found, *foundsig;
3220         isc_boolean_t empty_node;
3221         isc_result_t result;
3222         dns_fixedname_t fname, forigin;
3223         dns_name_t *name, *origin;
3224         dns_rdatatype_t type;
3225         rbtdb_rdatatype_t sigtype;
3226         isc_boolean_t wraps;
3227         isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3228
3229         if (tree == search->rbtdb->nsec3) {
3230                 type = dns_rdatatype_nsec3;
3231                 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3232                 wraps = ISC_TRUE;
3233         } else {
3234                 type = dns_rdatatype_nsec;
3235                 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3236                 wraps = ISC_FALSE;
3237         }
3238
3239  again:
3240         do {
3241                 node = NULL;
3242                 dns_fixedname_init(&fname);
3243                 name = dns_fixedname_name(&fname);
3244                 dns_fixedname_init(&forigin);
3245                 origin = dns_fixedname_name(&forigin);
3246                 result = dns_rbtnodechain_current(&search->chain, name,
3247                                                   origin, &node);
3248                 if (result != ISC_R_SUCCESS)
3249                         return (result);
3250                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3251                           isc_rwlocktype_read);
3252                 found = NULL;
3253                 foundsig = NULL;
3254                 empty_node = ISC_TRUE;
3255                 for (header = node->data;
3256                      header != NULL;
3257                      header = header_next) {
3258                         header_next = header->next;
3259                         /*
3260                          * Look for an active, extant NSEC or RRSIG NSEC.
3261                          */
3262                         do {
3263                                 if (header->serial <= search->serial &&
3264                                     !IGNORE(header)) {
3265                                         /*
3266                                          * Is this a "this rdataset doesn't
3267                                          * exist" record?
3268                                          */
3269                                         if (NONEXISTENT(header))
3270                                                 header = NULL;
3271                                         break;
3272                                 } else
3273                                         header = header->down;
3274                         } while (header != NULL);
3275                         if (header != NULL) {
3276                                 /*
3277                                  * We now know that there is at least one
3278                                  * active rdataset at this node.
3279                                  */
3280                                 empty_node = ISC_FALSE;
3281                                 if (header->type == type) {
3282                                         found = header;
3283                                         if (foundsig != NULL)
3284                                                 break;
3285                                 } else if (header->type == sigtype) {
3286                                         foundsig = header;
3287                                         if (found != NULL)
3288                                                 break;
3289                                 }
3290                         }
3291                 }
3292                 if (!empty_node) {
3293                         if (found != NULL && search->rbtversion->havensec3 &&
3294                             found->type == dns_rdatatype_nsec3 &&
3295                             !matchparams(found, search)) {
3296                                 empty_node = ISC_TRUE;
3297                                 found = NULL;
3298                                 foundsig = NULL;
3299                                 result = dns_rbtnodechain_prev(&search->chain,
3300                                                                NULL, NULL);
3301                         } else if (found != NULL &&
3302                                    (foundsig != NULL || !need_sig))
3303                         {
3304                                 /*
3305                                  * We've found the right NSEC/NSEC3 record.
3306                                  *
3307                                  * Note: for this to really be the right
3308                                  * NSEC record, it's essential that the NSEC
3309                                  * records of any nodes obscured by a zone
3310                                  * cut have been removed; we assume this is
3311                                  * the case.
3312                                  */
3313                                 result = dns_name_concatenate(name, origin,
3314                                                               foundname, NULL);
3315                                 if (result == ISC_R_SUCCESS) {
3316                                         if (nodep != NULL) {
3317                                                 new_reference(search->rbtdb,
3318                                                               node);
3319                                                 *nodep = node;
3320                                         }
3321                                         bind_rdataset(search->rbtdb, node,
3322                                                       found, search->now,
3323                                                       rdataset);
3324                                         if (foundsig != NULL)
3325                                                 bind_rdataset(search->rbtdb,
3326                                                               node,
3327                                                               foundsig,
3328                                                               search->now,
3329                                                               sigrdataset);
3330                                 }
3331                         } else if (found == NULL && foundsig == NULL) {
3332                                 /*
3333                                  * This node is active, but has no NSEC or
3334                                  * RRSIG NSEC.  That means it's glue or
3335                                  * other obscured zone data that isn't
3336                                  * relevant for our search.  Treat the
3337                                  * node as if it were empty and keep looking.
3338                                  */
3339                                 empty_node = ISC_TRUE;
3340                                 result = dns_rbtnodechain_prev(&search->chain,
3341                                                                NULL, NULL);
3342                         } else {
3343                                 /*
3344                                  * We found an active node, but either the
3345                                  * NSEC or the RRSIG NSEC is missing.  This
3346                                  * shouldn't happen.
3347                                  */
3348                                 result = DNS_R_BADDB;
3349                         }
3350                 } else {
3351                         /*
3352                          * This node isn't active.  We've got to keep
3353                          * looking.
3354                          */
3355                         result = dns_rbtnodechain_prev(&search->chain, NULL,
3356                                                        NULL);
3357                 }
3358                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3359                             isc_rwlocktype_read);
3360         } while (empty_node && result == ISC_R_SUCCESS);
3361
3362         if (result == ISC_R_NOMORE && wraps) {
3363                 result = dns_rbtnodechain_last(&search->chain, tree,
3364                                                NULL, NULL);
3365                 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3366                         wraps = ISC_FALSE;
3367                         goto again;
3368                 }
3369         }
3370
3371         /*
3372          * If the result is ISC_R_NOMORE, then we got to the beginning of
3373          * the database and didn't find a NSEC record.  This shouldn't
3374          * happen.
3375          */
3376         if (result == ISC_R_NOMORE)
3377                 result = DNS_R_BADDB;
3378
3379         return (result);
3380 }
3381
3382 static isc_result_t
3383 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3384           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3385           dns_dbnode_t **nodep, dns_name_t *foundname,
3386           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3387 {
3388         dns_rbtnode_t *node = NULL;
3389         isc_result_t result;
3390         rbtdb_search_t search;
3391         isc_boolean_t cname_ok = ISC_TRUE;
3392         isc_boolean_t close_version = ISC_FALSE;
3393         isc_boolean_t maybe_zonecut = ISC_FALSE;
3394         isc_boolean_t at_zonecut = ISC_FALSE;
3395         isc_boolean_t wild;
3396         isc_boolean_t empty_node;
3397         rdatasetheader_t *header, *header_next, *found, *nsecheader;
3398         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3399         rbtdb_rdatatype_t sigtype;
3400         isc_boolean_t active;
3401         dns_rbtnodechain_t chain;
3402         nodelock_t *lock;
3403         dns_rbt_t *tree;
3404
3405         search.rbtdb = (dns_rbtdb_t *)db;
3406
3407         REQUIRE(VALID_RBTDB(search.rbtdb));
3408
3409         /*
3410          * We don't care about 'now'.
3411          */
3412         UNUSED(now);
3413
3414         /*
3415          * If the caller didn't supply a version, attach to the current
3416          * version.
3417          */
3418         if (version == NULL) {
3419                 currentversion(db, &version);
3420                 close_version = ISC_TRUE;
3421         }
3422
3423         search.rbtversion = version;
3424         search.serial = search.rbtversion->serial;
3425         search.options = options;
3426         search.copy_name = ISC_FALSE;
3427         search.need_cleanup = ISC_FALSE;
3428         search.wild = ISC_FALSE;
3429         search.zonecut = NULL;
3430         dns_fixedname_init(&search.zonecut_name);
3431         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3432         search.now = 0;
3433
3434         /*
3435          * 'wild' will be true iff. we've matched a wildcard.
3436          */
3437         wild = ISC_FALSE;
3438
3439         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3440
3441         /*
3442          * Search down from the root of the tree.  If, while going down, we
3443          * encounter a callback node, zone_zonecut_callback() will search the
3444          * rdatasets at the zone cut for active DNAME or NS rdatasets.
3445          */
3446         tree =  (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3447                                                          search.rbtdb->tree;
3448         result = dns_rbt_findnode(tree, name, foundname, &node,
3449                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
3450                                   zone_zonecut_callback, &search);
3451
3452         if (result == DNS_R_PARTIALMATCH) {
3453         partial_match:
3454                 if (search.zonecut != NULL) {
3455                     result = setup_delegation(&search, nodep, foundname,
3456                                               rdataset, sigrdataset);
3457                     goto tree_exit;
3458                 }
3459
3460                 if (search.wild) {
3461                         /*
3462                          * At least one of the levels in the search chain
3463                          * potentially has a wildcard.  For each such level,
3464                          * we must see if there's a matching wildcard active
3465                          * in the current version.
3466                          */
3467                         result = find_wildcard(&search, &node, name);
3468                         if (result == ISC_R_SUCCESS) {
3469                                 result = dns_name_copy(name, foundname, NULL);
3470                                 if (result != ISC_R_SUCCESS)
3471                                         goto tree_exit;
3472                                 wild = ISC_TRUE;
3473                                 goto found;
3474                         }
3475                         else if (result != ISC_R_NOTFOUND)
3476                                 goto tree_exit;
3477                 }
3478
3479                 chain = search.chain;
3480                 active = activeempty(&search, &chain, name);
3481
3482                 /*
3483                  * If we're here, then the name does not exist, is not
3484                  * beneath a zonecut, and there's no matching wildcard.
3485                  */
3486                 if ((search.rbtversion->secure == dns_db_secure &&
3487                      !search.rbtversion->havensec3) ||
3488                     (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3489                     (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3490                 {
3491                         result = find_closest_nsec(&search, nodep, foundname,
3492                                                    rdataset, sigrdataset, tree,
3493                                                    search.rbtversion->secure);
3494                         if (result == ISC_R_SUCCESS)
3495                                 result = active ? DNS_R_EMPTYNAME :
3496                                                   DNS_R_NXDOMAIN;
3497                 } else
3498                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3499                 goto tree_exit;
3500         } else if (result != ISC_R_SUCCESS)
3501                 goto tree_exit;
3502
3503  found:
3504         /*
3505          * We have found a node whose name is the desired name, or we
3506          * have matched a wildcard.
3507          */
3508
3509         if (search.zonecut != NULL) {
3510                 /*
3511                  * If we're beneath a zone cut, we don't want to look for
3512                  * CNAMEs because they're not legitimate zone glue.
3513                  */
3514                 cname_ok = ISC_FALSE;
3515         } else {
3516                 /*
3517                  * The node may be a zone cut itself.  If it might be one,
3518                  * make sure we check for it later.
3519                  *
3520                  * DS records live above the zone cut in ordinary zone so
3521                  * we want to ignore any referral.
3522                  *
3523                  * Stub zones don't have anything "above" the delgation so
3524                  * we always return a referral.
3525                  */
3526                 if (node->find_callback &&
3527                     ((node != search.rbtdb->origin_node &&
3528                       !dns_rdatatype_atparent(type)) ||
3529                      IS_STUB(search.rbtdb)))
3530                         maybe_zonecut = ISC_TRUE;
3531         }
3532
3533         /*
3534          * Certain DNSSEC types are not subject to CNAME matching
3535          * (RFC4035, section 2.5 and RFC3007).
3536          *
3537          * We don't check for RRSIG, because we don't store RRSIG records
3538          * directly.
3539          */
3540         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3541                 cname_ok = ISC_FALSE;
3542
3543         /*
3544          * We now go looking for rdata...
3545          */
3546
3547         lock = &search.rbtdb->node_locks[node->locknum].lock;
3548         NODE_LOCK(lock, isc_rwlocktype_read);
3549
3550         found = NULL;
3551         foundsig = NULL;
3552         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3553         nsecheader = NULL;
3554         nsecsig = NULL;
3555         cnamesig = NULL;
3556         empty_node = ISC_TRUE;
3557         for (header = node->data; header != NULL; header = header_next) {
3558                 header_next = header->next;
3559                 /*
3560                  * Look for an active, extant rdataset.
3561                  */
3562                 do {
3563                         if (header->serial <= search.serial &&
3564                             !IGNORE(header)) {
3565                                 /*
3566                                  * Is this a "this rdataset doesn't
3567                                  * exist" record?
3568                                  */
3569                                 if (NONEXISTENT(header))
3570                                         header = NULL;
3571                                 break;
3572                         } else
3573                                 header = header->down;
3574                 } while (header != NULL);
3575                 if (header != NULL) {
3576                         /*
3577                          * We now know that there is at least one active
3578                          * rdataset at this node.
3579                          */
3580                         empty_node = ISC_FALSE;
3581
3582                         /*
3583                          * Do special zone cut handling, if requested.
3584                          */
3585                         if (maybe_zonecut &&
3586                             header->type == dns_rdatatype_ns) {
3587                                 /*
3588                                  * We increment the reference count on node to
3589                                  * ensure that search->zonecut_rdataset will
3590                                  * still be valid later.
3591                                  */
3592                                 new_reference(search.rbtdb, node);
3593                                 search.zonecut = node;
3594                                 search.zonecut_rdataset = header;
3595                                 search.zonecut_sigrdataset = NULL;
3596                                 search.need_cleanup = ISC_TRUE;
3597                                 maybe_zonecut = ISC_FALSE;
3598                                 at_zonecut = ISC_TRUE;
3599                                 /*
3600                                  * It is not clear if KEY should still be
3601                                  * allowed at the parent side of the zone
3602                                  * cut or not.  It is needed for RFC3007
3603                                  * validated updates.
3604                                  */
3605                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3606                                     && type != dns_rdatatype_nsec
3607                                     && type != dns_rdatatype_key) {
3608                                         /*
3609                                          * Glue is not OK, but any answer we
3610                                          * could return would be glue.  Return
3611                                          * the delegation.
3612                                          */
3613                                         found = NULL;
3614                                         break;
3615                                 }
3616                                 if (found != NULL && foundsig != NULL)
3617                                         break;
3618                         }
3619
3620
3621                         /*
3622                          * If the NSEC3 record doesn't match the chain
3623                          * we are using behave as if it isn't here.
3624                          */
3625                         if (header->type == dns_rdatatype_nsec3 &&
3626                            !matchparams(header, &search)) {
3627                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3628                                 goto partial_match;
3629                         }
3630                         /*
3631                          * If we found a type we were looking for,
3632                          * remember it.
3633                          */
3634                         if (header->type == type ||
3635                             type == dns_rdatatype_any ||
3636                             (header->type == dns_rdatatype_cname &&
3637                              cname_ok)) {
3638                                 /*
3639                                  * We've found the answer!
3640                                  */
3641                                 found = header;
3642                                 if (header->type == dns_rdatatype_cname &&
3643                                     cname_ok) {
3644                                         /*
3645                                          * We may be finding a CNAME instead
3646                                          * of the desired type.
3647                                          *
3648                                          * If we've already got the CNAME RRSIG,
3649                                          * use it, otherwise change sigtype
3650                                          * so that we find it.
3651                                          */
3652                                         if (cnamesig != NULL)
3653                                                 foundsig = cnamesig;
3654                                         else
3655                                                 sigtype =
3656                                                     RBTDB_RDATATYPE_SIGCNAME;
3657                                 }
3658                                 /*
3659                                  * If we've got all we need, end the search.
3660                                  */
3661                                 if (!maybe_zonecut && foundsig != NULL)
3662                                         break;
3663                         } else if (header->type == sigtype) {
3664                                 /*
3665                                  * We've found the RRSIG rdataset for our
3666                                  * target type.  Remember it.
3667                                  */
3668                                 foundsig = header;
3669                                 /*
3670                                  * If we've got all we need, end the search.
3671                                  */
3672                                 if (!maybe_zonecut && found != NULL)
3673                                         break;
3674                         } else if (header->type == dns_rdatatype_nsec &&
3675                                    !search.rbtversion->havensec3) {
3676                                 /*
3677                                  * Remember a NSEC rdataset even if we're
3678                                  * not specifically looking for it, because
3679                                  * we might need it later.
3680                                  */
3681                                 nsecheader = header;
3682                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3683                                    !search.rbtversion->havensec3) {
3684                                 /*
3685                                  * If we need the NSEC rdataset, we'll also
3686                                  * need its signature.
3687                                  */
3688                                 nsecsig = header;
3689                         } else if (cname_ok &&
3690                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3691                                 /*
3692                                  * If we get a CNAME match, we'll also need
3693                                  * its signature.
3694                                  */
3695                                 cnamesig = header;
3696                         }
3697                 }
3698         }
3699
3700         if (empty_node) {
3701                 /*
3702                  * We have an exact match for the name, but there are no
3703                  * active rdatasets in the desired version.  That means that
3704                  * this node doesn't exist in the desired version, and that
3705                  * we really have a partial match.
3706                  */
3707                 if (!wild) {
3708                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3709                         goto partial_match;
3710                 }
3711         }
3712
3713         /*
3714          * If we didn't find what we were looking for...
3715          */
3716         if (found == NULL) {
3717                 if (search.zonecut != NULL) {
3718                         /*
3719                          * We were trying to find glue at a node beneath a
3720                          * zone cut, but didn't.
3721                          *
3722                          * Return the delegation.
3723                          */
3724                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3725                         result = setup_delegation(&search, nodep, foundname,
3726                                                   rdataset, sigrdataset);
3727                         goto tree_exit;
3728                 }
3729                 /*
3730                  * The desired type doesn't exist.
3731                  */
3732                 result = DNS_R_NXRRSET;
3733                 if (search.rbtversion->secure == dns_db_secure &&
3734                     !search.rbtversion->havensec3 &&
3735                     (nsecheader == NULL || nsecsig == NULL)) {
3736                         /*
3737                          * The zone is secure but there's no NSEC,
3738                          * or the NSEC has no signature!
3739                          */
3740                         if (!wild) {
3741                                 result = DNS_R_BADDB;
3742                                 goto node_exit;
3743                         }
3744
3745                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3746                         result = find_closest_nsec(&search, nodep, foundname,
3747                                                    rdataset, sigrdataset,
3748                                                    search.rbtdb->tree,
3749                                                    search.rbtversion->secure);
3750                         if (result == ISC_R_SUCCESS)
3751                                 result = DNS_R_EMPTYWILD;
3752                         goto tree_exit;
3753                 }
3754                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3755                     nsecheader == NULL)
3756                 {
3757                         /*
3758                          * There's no NSEC record, and we were told
3759                          * to find one.
3760                          */
3761                         result = DNS_R_BADDB;
3762                         goto node_exit;
3763                 }
3764                 if (nodep != NULL) {
3765                         new_reference(search.rbtdb, node);
3766                         *nodep = node;
3767                 }
3768                 if ((search.rbtversion->secure == dns_db_secure &&
3769                      !search.rbtversion->havensec3) ||
3770                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
3771                 {
3772                         bind_rdataset(search.rbtdb, node, nsecheader,
3773                                       0, rdataset);
3774                         if (nsecsig != NULL)
3775                                 bind_rdataset(search.rbtdb, node,
3776                                               nsecsig, 0, sigrdataset);
3777                 }
3778                 if (wild)
3779                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3780                 goto node_exit;
3781         }
3782
3783         /*
3784          * We found what we were looking for, or we found a CNAME.
3785          */
3786
3787         if (type != found->type &&
3788             type != dns_rdatatype_any &&
3789             found->type == dns_rdatatype_cname) {
3790                 /*
3791                  * We weren't doing an ANY query and we found a CNAME instead
3792                  * of the type we were looking for, so we need to indicate
3793                  * that result to the caller.
3794                  */
3795                 result = DNS_R_CNAME;
3796         } else if (search.zonecut != NULL) {
3797                 /*
3798                  * If we're beneath a zone cut, we must indicate that the
3799                  * result is glue, unless we're actually at the zone cut
3800                  * and the type is NSEC or KEY.
3801                  */
3802                 if (search.zonecut == node) {
3803                         /*
3804                          * It is not clear if KEY should still be
3805                          * allowed at the parent side of the zone
3806                          * cut or not.  It is needed for RFC3007
3807                          * validated updates.
3808                          */
3809                         if (type == dns_rdatatype_nsec ||
3810                             type == dns_rdatatype_nsec3 ||
3811                             type == dns_rdatatype_key)
3812                                 result = ISC_R_SUCCESS;
3813                         else if (type == dns_rdatatype_any)
3814                                 result = DNS_R_ZONECUT;
3815                         else
3816                                 result = DNS_R_GLUE;
3817                 } else
3818                         result = DNS_R_GLUE;
3819                 /*
3820                  * We might have found data that isn't glue, but was occluded
3821                  * by a dynamic update.  If the caller cares about this, they
3822                  * will have told us to validate glue.
3823                  *
3824                  * XXX We should cache the glue validity state!
3825                  */
3826                 if (result == DNS_R_GLUE &&
3827                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3828                     !valid_glue(&search, foundname, type, node)) {
3829                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3830                         result = setup_delegation(&search, nodep, foundname,
3831                                                   rdataset, sigrdataset);
3832                     goto tree_exit;
3833                 }
3834         } else {
3835                 /*
3836                  * An ordinary successful query!
3837                  */
3838                 result = ISC_R_SUCCESS;
3839         }
3840
3841         if (nodep != NULL) {
3842                 if (!at_zonecut)
3843                         new_reference(search.rbtdb, node);
3844                 else
3845                         search.need_cleanup = ISC_FALSE;
3846                 *nodep = node;
3847         }
3848
3849         if (type != dns_rdatatype_any) {
3850                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3851                 if (foundsig != NULL)
3852                         bind_rdataset(search.rbtdb, node, foundsig, 0,
3853                                       sigrdataset);
3854         }
3855
3856         if (wild)
3857                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3858
3859  node_exit:
3860         NODE_UNLOCK(lock, isc_rwlocktype_read);
3861
3862  tree_exit:
3863         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3864
3865         /*
3866          * If we found a zonecut but aren't going to use it, we have to
3867          * let go of it.
3868          */
3869         if (search.need_cleanup) {
3870                 node = search.zonecut;
3871                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3872
3873                 NODE_LOCK(lock, isc_rwlocktype_read);
3874                 decrement_reference(search.rbtdb, node, 0,
3875                                     isc_rwlocktype_read, isc_rwlocktype_none,
3876                                     ISC_FALSE);
3877                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3878         }
3879
3880         if (close_version)
3881                 closeversion(db, &version, ISC_FALSE);
3882
3883         dns_rbtnodechain_reset(&search.chain);
3884
3885         return (result);
3886 }
3887
3888 static isc_result_t
3889 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3890                  isc_stdtime_t now, dns_dbnode_t **nodep,
3891                  dns_name_t *foundname,
3892                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3893 {
3894         UNUSED(db);
3895         UNUSED(name);
3896         UNUSED(options);
3897         UNUSED(now);
3898         UNUSED(nodep);
3899         UNUSED(foundname);
3900         UNUSED(rdataset);
3901         UNUSED(sigrdataset);
3902
3903         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3904
3905         return (ISC_R_NOTIMPLEMENTED);
3906 }
3907
3908 static isc_result_t
3909 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3910         rbtdb_search_t *search = arg;
3911         rdatasetheader_t *header, *header_prev, *header_next;
3912         rdatasetheader_t *dname_header, *sigdname_header;
3913         isc_result_t result;
3914         nodelock_t *lock;
3915         isc_rwlocktype_t locktype;
3916
3917         /* XXX comment */
3918
3919         REQUIRE(search->zonecut == NULL);
3920
3921         /*
3922          * Keep compiler silent.
3923          */
3924         UNUSED(name);
3925
3926         lock = &(search->rbtdb->node_locks[node->locknum].lock);
3927         locktype = isc_rwlocktype_read;
3928         NODE_LOCK(lock, locktype);
3929
3930         /*
3931          * Look for a DNAME or RRSIG DNAME rdataset.
3932          */
3933         dname_header = NULL;
3934         sigdname_header = NULL;
3935         header_prev = NULL;
3936         for (header = node->data; header != NULL; header = header_next) {
3937                 header_next = header->next;
3938                 if (header->rdh_ttl <= search->now) {
3939                         /*
3940                          * This rdataset is stale.  If no one else is
3941                          * using the node, we can clean it up right
3942                          * now, otherwise we mark it as stale, and
3943                          * the node as dirty, so it will get cleaned
3944                          * up later.
3945                          */
3946                         if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
3947                             (locktype == isc_rwlocktype_write ||
3948                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3949                                 /*
3950                                  * We update the node's status only when we
3951                                  * can get write access; otherwise, we leave
3952                                  * others to this work.  Periodical cleaning
3953                                  * will eventually take the job as the last
3954                                  * resort.
3955                                  * We won't downgrade the lock, since other
3956                                  * rdatasets are probably stale, too.
3957                                  */
3958                                 locktype = isc_rwlocktype_write;
3959
3960                                 if (dns_rbtnode_refcurrent(node) == 0) {
3961                                         isc_mem_t *mctx;
3962
3963                                         /*
3964                                          * header->down can be non-NULL if the
3965                                          * refcount has just decremented to 0
3966                                          * but decrement_reference() has not
3967                                          * performed clean_cache_node(), in
3968                                          * which case we need to purge the
3969                                          * stale headers first.
3970                                          */
3971                                         mctx = search->rbtdb->common.mctx;
3972                                         clean_stale_headers(search->rbtdb,
3973                                                             mctx,
3974                                                             header);
3975                                         if (header_prev != NULL)
3976                                                 header_prev->next =
3977                                                         header->next;
3978                                         else
3979                                                 node->data = header->next;
3980                                         free_rdataset(search->rbtdb, mctx,
3981                                                       header);
3982                                 } else {
3983                                         header->attributes |=
3984                                                 RDATASET_ATTR_STALE;
3985                                         node->dirty = 1;
3986                                         header_prev = header;
3987                                 }
3988                         } else
3989                                 header_prev = header;
3990                 } else if (header->type == dns_rdatatype_dname &&
3991                            EXISTS(header)) {
3992                         dname_header = header;
3993                         header_prev = header;
3994                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
3995                          EXISTS(header)) {
3996                         sigdname_header = header;
3997                         header_prev = header;
3998                 } else
3999                         header_prev = header;
4000         }
4001
4002         if (dname_header != NULL &&
4003             (!DNS_TRUST_PENDING(dname_header->trust) ||
4004              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4005                 /*
4006                  * We increment the reference count on node to ensure that
4007                  * search->zonecut_rdataset will still be valid later.
4008                  */
4009                 new_reference(search->rbtdb, node);
4010                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4011                 search->zonecut = node;
4012                 search->zonecut_rdataset = dname_header;
4013                 search->zonecut_sigrdataset = sigdname_header;
4014                 search->need_cleanup = ISC_TRUE;
4015                 result = DNS_R_PARTIALMATCH;
4016         } else
4017                 result = DNS_R_CONTINUE;
4018
4019         NODE_UNLOCK(lock, locktype);
4020
4021         return (result);
4022 }
4023
4024 static inline isc_result_t
4025 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4026                      dns_dbnode_t **nodep, dns_name_t *foundname,
4027                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4028 {
4029         unsigned int i;
4030         dns_rbtnode_t *level_node;
4031         rdatasetheader_t *header, *header_prev, *header_next;
4032         rdatasetheader_t *found, *foundsig;
4033         isc_result_t result = ISC_R_NOTFOUND;
4034         dns_name_t name;
4035         dns_rbtdb_t *rbtdb;
4036         isc_boolean_t done;
4037         nodelock_t *lock;
4038         isc_rwlocktype_t locktype;
4039
4040         /*
4041          * Caller must be holding the tree lock.
4042          */
4043
4044         rbtdb = search->rbtdb;
4045         i = search->chain.level_matches;
4046         done = ISC_FALSE;
4047         do {
4048                 locktype = isc_rwlocktype_read;
4049                 lock = &rbtdb->node_locks[node->locknum].lock;
4050                 NODE_LOCK(lock, locktype);
4051
4052                 /*
4053                  * Look for NS and RRSIG NS rdatasets.
4054                  */
4055                 found = NULL;
4056                 foundsig = NULL;
4057                 header_prev = NULL;
4058                 for (header = node->data;
4059                      header != NULL;
4060                      header = header_next) {
4061                         header_next = header->next;
4062                         if (header->rdh_ttl <= search->now) {
4063                                 /*
4064                                  * This rdataset is stale.  If no one else is
4065                                  * using the node, we can clean it up right
4066                                  * now, otherwise we mark it as stale, and
4067                                  * the node as dirty, so it will get cleaned
4068                                  * up later.
4069                                  */
4070                                 if ((header->rdh_ttl <= search->now -
4071                                                     RBTDB_VIRTUAL) &&
4072                                     (locktype == isc_rwlocktype_write ||
4073                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4074                                         /*
4075                                          * We update the node's status only
4076                                          * when we can get write access.
4077                                          */
4078                                         locktype = isc_rwlocktype_write;
4079
4080                                         if (dns_rbtnode_refcurrent(node)
4081                                             == 0) {
4082                                                 isc_mem_t *m;
4083
4084                                                 m = search->rbtdb->common.mctx;
4085                                                 clean_stale_headers(
4086                                                         search->rbtdb,
4087                                                         m, header);
4088                                                 if (header_prev != NULL)
4089                                                         header_prev->next =
4090                                                                 header->next;
4091                                                 else
4092                                                         node->data =
4093                                                                 header->next;
4094                                                 free_rdataset(rbtdb, m,
4095                                                               header);
4096                                         } else {
4097                                                 header->attributes |=
4098                                                         RDATASET_ATTR_STALE;
4099                                                 node->dirty = 1;
4100                                                 header_prev = header;
4101                                         }
4102                                 } else
4103                                         header_prev = header;
4104                         } else if (EXISTS(header)) {
4105                                 /*
4106                                  * We've found an extant rdataset.  See if
4107                                  * we're interested in it.
4108                                  */
4109                                 if (header->type == dns_rdatatype_ns) {
4110                                         found = header;
4111                                         if (foundsig != NULL)
4112                                                 break;
4113                                 } else if (header->type ==
4114                                            RBTDB_RDATATYPE_SIGNS) {
4115                                         foundsig = header;
4116                                         if (found != NULL)
4117                                                 break;
4118                                 }
4119                                 header_prev = header;
4120                         } else
4121                                 header_prev = header;
4122                 }
4123
4124                 if (found != NULL) {
4125                         /*
4126                          * If we have to set foundname, we do it before
4127                          * anything else.  If we were to set foundname after
4128                          * we had set nodep or bound the rdataset, then we'd
4129                          * have to undo that work if dns_name_concatenate()
4130                          * failed.  By setting foundname first, there's
4131                          * nothing to undo if we have trouble.
4132                          */
4133                         if (foundname != NULL) {
4134                                 dns_name_init(&name, NULL);
4135                                 dns_rbt_namefromnode(node, &name);
4136                                 result = dns_name_copy(&name, foundname, NULL);
4137                                 while (result == ISC_R_SUCCESS && i > 0) {
4138                                         i--;
4139                                         level_node = search->chain.levels[i];
4140                                         dns_name_init(&name, NULL);
4141                                         dns_rbt_namefromnode(level_node,
4142                                                              &name);
4143                                         result =
4144                                                 dns_name_concatenate(foundname,
4145                                                                      &name,
4146                                                                      foundname,
4147                                                                      NULL);
4148                                 }
4149                                 if (result != ISC_R_SUCCESS) {
4150                                         *nodep = NULL;
4151                                         goto node_exit;
4152                                 }
4153                         }
4154                         result = DNS_R_DELEGATION;
4155                         if (nodep != NULL) {
4156                                 new_reference(search->rbtdb, node);
4157                                 *nodep = node;
4158                         }
4159                         bind_rdataset(search->rbtdb, node, found, search->now,
4160                                       rdataset);
4161                         if (foundsig != NULL)
4162                                 bind_rdataset(search->rbtdb, node, foundsig,
4163                                               search->now, sigrdataset);
4164                         if (need_headerupdate(found, search->now) ||
4165                             (foundsig != NULL &&
4166                              need_headerupdate(foundsig, search->now))) {
4167                                 if (locktype != isc_rwlocktype_write) {
4168                                         NODE_UNLOCK(lock, locktype);
4169                                         NODE_LOCK(lock, isc_rwlocktype_write);
4170                                         locktype = isc_rwlocktype_write;
4171                                 }
4172                                 if (need_headerupdate(found, search->now))
4173                                         update_header(search->rbtdb, found,
4174                                                       search->now);
4175                                 if (foundsig != NULL &&
4176                                     need_headerupdate(foundsig, search->now)) {
4177                                         update_header(search->rbtdb, foundsig,
4178                                                       search->now);
4179                                 }
4180                         }
4181                 }
4182
4183         node_exit:
4184                 NODE_UNLOCK(lock, locktype);
4185
4186                 if (found == NULL && i > 0) {
4187                         i--;
4188                         node = search->chain.levels[i];
4189                 } else
4190                         done = ISC_TRUE;
4191
4192         } while (!done);
4193
4194         return (result);
4195 }
4196
4197 static isc_result_t
4198 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4199                   isc_stdtime_t now, dns_name_t *foundname,
4200                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4201 {
4202         dns_rbtnode_t *node;
4203         rdatasetheader_t *header, *header_next, *header_prev;
4204         rdatasetheader_t *found, *foundsig;
4205         isc_boolean_t empty_node;
4206         isc_result_t result;
4207         dns_fixedname_t fname, forigin;
4208         dns_name_t *name, *origin;
4209         rbtdb_rdatatype_t matchtype, sigmatchtype;
4210         nodelock_t *lock;
4211         isc_rwlocktype_t locktype;
4212
4213         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4214         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4215                                              dns_rdatatype_nsec);
4216
4217         do {
4218                 node = NULL;
4219                 dns_fixedname_init(&fname);
4220                 name = dns_fixedname_name(&fname);
4221                 dns_fixedname_init(&forigin);
4222                 origin = dns_fixedname_name(&forigin);
4223                 result = dns_rbtnodechain_current(&search->chain, name,
4224                                                   origin, &node);
4225                 if (result != ISC_R_SUCCESS)
4226                         return (result);
4227                 locktype = isc_rwlocktype_read;
4228                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4229                 NODE_LOCK(lock, locktype);
4230                 found = NULL;
4231                 foundsig = NULL;
4232                 empty_node = ISC_TRUE;
4233                 header_prev = NULL;
4234                 for (header = node->data;
4235                      header != NULL;
4236                      header = header_next) {
4237                         header_next = header->next;
4238                         if (header->rdh_ttl <= now) {
4239                                 /*
4240                                  * This rdataset is stale.  If no one else is
4241                                  * using the node, we can clean it up right
4242                                  * now, otherwise we mark it as stale, and the
4243                                  * node as dirty, so it will get cleaned up
4244                                  * later.
4245                                  */
4246                                 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4247                                     (locktype == isc_rwlocktype_write ||
4248                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4249                                         /*
4250                                          * We update the node's status only
4251                                          * when we can get write access.
4252                                          */
4253                                         locktype = isc_rwlocktype_write;
4254
4255                                         if (dns_rbtnode_refcurrent(node)
4256                                             == 0) {
4257                                                 isc_mem_t *m;
4258
4259                                                 m = search->rbtdb->common.mctx;
4260                                                 clean_stale_headers(
4261                                                         search->rbtdb,
4262                                                         m, header);
4263                                                 if (header_prev != NULL)
4264                                                         header_prev->next =
4265                                                                 header->next;
4266                                                 else
4267                                                         node->data = header->next;
4268                                                 free_rdataset(search->rbtdb, m,
4269                                                               header);
4270                                         } else {
4271                                                 header->attributes |=
4272                                                         RDATASET_ATTR_STALE;
4273                                                 node->dirty = 1;
4274                                                 header_prev = header;
4275                                         }
4276                                 } else
4277                                         header_prev = header;
4278                                 continue;
4279                         }
4280                         if (NONEXISTENT(header) ||
4281                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
4282                                 header_prev = header;
4283                                 continue;
4284                         }
4285                         empty_node = ISC_FALSE;
4286                         if (header->type == matchtype)
4287                                 found = header;
4288                         else if (header->type == sigmatchtype)
4289                                 foundsig = header;
4290                         header_prev = header;
4291                 }
4292                 if (found != NULL) {
4293                         result = dns_name_concatenate(name, origin,
4294                                                       foundname, NULL);
4295                         if (result != ISC_R_SUCCESS)
4296                                 goto unlock_node;
4297                         bind_rdataset(search->rbtdb, node, found,
4298                                       now, rdataset);
4299                         if (foundsig != NULL)
4300                                 bind_rdataset(search->rbtdb, node, foundsig,
4301                                               now, sigrdataset);
4302                         new_reference(search->rbtdb, node);
4303                         *nodep = node;
4304                         result = DNS_R_COVERINGNSEC;
4305                 } else if (!empty_node) {
4306                         result = ISC_R_NOTFOUND;
4307                 } else
4308                         result = dns_rbtnodechain_prev(&search->chain, NULL,
4309                                                        NULL);
4310  unlock_node:
4311                 NODE_UNLOCK(lock, locktype);
4312         } while (empty_node && result == ISC_R_SUCCESS);
4313         return (result);
4314 }
4315
4316 static isc_result_t
4317 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4318            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4319            dns_dbnode_t **nodep, dns_name_t *foundname,
4320            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4321 {
4322         dns_rbtnode_t *node = NULL;
4323         isc_result_t result;
4324         rbtdb_search_t search;
4325         isc_boolean_t cname_ok = ISC_TRUE;
4326         isc_boolean_t empty_node;
4327         nodelock_t *lock;
4328         isc_rwlocktype_t locktype;
4329         rdatasetheader_t *header, *header_prev, *header_next;
4330         rdatasetheader_t *found, *nsheader;
4331         rdatasetheader_t *foundsig, *nssig, *cnamesig;
4332         rdatasetheader_t *update, *updatesig;
4333         rbtdb_rdatatype_t sigtype, negtype;
4334
4335         UNUSED(version);
4336
4337         search.rbtdb = (dns_rbtdb_t *)db;
4338
4339         REQUIRE(VALID_RBTDB(search.rbtdb));
4340         REQUIRE(version == NULL);
4341
4342         if (now == 0)
4343                 isc_stdtime_get(&now);
4344
4345         search.rbtversion = NULL;
4346         search.serial = 1;
4347         search.options = options;
4348         search.copy_name = ISC_FALSE;
4349         search.need_cleanup = ISC_FALSE;
4350         search.wild = ISC_FALSE;
4351         search.zonecut = NULL;
4352         dns_fixedname_init(&search.zonecut_name);
4353         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4354         search.now = now;
4355         update = NULL;
4356         updatesig = NULL;
4357
4358         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4359
4360         /*
4361          * Search down from the root of the tree.  If, while going down, we
4362          * encounter a callback node, cache_zonecut_callback() will search the
4363          * rdatasets at the zone cut for a DNAME rdataset.
4364          */
4365         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4366                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
4367                                   cache_zonecut_callback, &search);
4368
4369         if (result == DNS_R_PARTIALMATCH) {
4370                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4371                         result = find_coveringnsec(&search, nodep, now,
4372                                                    foundname, rdataset,
4373                                                    sigrdataset);
4374                         if (result == DNS_R_COVERINGNSEC)
4375                                 goto tree_exit;
4376                 }
4377                 if (search.zonecut != NULL) {
4378                     result = setup_delegation(&search, nodep, foundname,
4379                                               rdataset, sigrdataset);
4380                     goto tree_exit;
4381                 } else {
4382                 find_ns:
4383                         result = find_deepest_zonecut(&search, node, nodep,
4384                                                       foundname, rdataset,
4385                                                       sigrdataset);
4386                         goto tree_exit;
4387                 }
4388         } else if (result != ISC_R_SUCCESS)
4389                 goto tree_exit;
4390
4391         /*
4392          * Certain DNSSEC types are not subject to CNAME matching
4393          * (RFC4035, section 2.5 and RFC3007).
4394          *
4395          * We don't check for RRSIG, because we don't store RRSIG records
4396          * directly.
4397          */
4398         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4399                 cname_ok = ISC_FALSE;
4400
4401         /*
4402          * We now go looking for rdata...
4403          */
4404
4405         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4406         locktype = isc_rwlocktype_read;
4407         NODE_LOCK(lock, locktype);
4408
4409         found = NULL;
4410         foundsig = NULL;
4411         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4412         negtype = RBTDB_RDATATYPE_VALUE(0, type);
4413         nsheader = NULL;
4414         nssig = NULL;
4415         cnamesig = NULL;
4416         empty_node = ISC_TRUE;
4417         header_prev = NULL;
4418         for (header = node->data; header != NULL; header = header_next) {
4419                 header_next = header->next;
4420                 if (header->rdh_ttl <= now) {
4421                         /*
4422                          * This rdataset is stale.  If no one else is using the
4423                          * node, we can clean it up right now, otherwise we
4424                          * mark it as stale, and the node as dirty, so it will
4425                          * get cleaned up later.
4426                          */
4427                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4428                             (locktype == isc_rwlocktype_write ||
4429                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4430                                 /*
4431                                  * We update the node's status only when we
4432                                  * can get write access.
4433                                  */
4434                                 locktype = isc_rwlocktype_write;
4435
4436                                 if (dns_rbtnode_refcurrent(node) == 0) {
4437                                         isc_mem_t *mctx;
4438
4439                                         mctx = search.rbtdb->common.mctx;
4440                                         clean_stale_headers(search.rbtdb, mctx,
4441                                                             header);
4442                                         if (header_prev != NULL)
4443                                                 header_prev->next =
4444                                                         header->next;
4445                                         else
4446                                                 node->data = header->next;
4447                                         free_rdataset(search.rbtdb, mctx,
4448                                                       header);
4449                                 } else {
4450                                         header->attributes |=
4451                                                 RDATASET_ATTR_STALE;
4452                                         node->dirty = 1;
4453                                         header_prev = header;
4454                                 }
4455                         } else
4456                                 header_prev = header;
4457                 } else if (EXISTS(header)) {
4458                         /*
4459                          * We now know that there is at least one active
4460                          * non-stale rdataset at this node.
4461                          */
4462                         empty_node = ISC_FALSE;
4463
4464                         /*
4465                          * If we found a type we were looking for, remember
4466                          * it.
4467                          */
4468                         if (header->type == type ||
4469                             (type == dns_rdatatype_any &&
4470                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4471                             (cname_ok && header->type ==
4472                              dns_rdatatype_cname)) {
4473                                 /*
4474                                  * We've found the answer.
4475                                  */
4476                                 found = header;
4477                                 if (header->type == dns_rdatatype_cname &&
4478                                     cname_ok &&
4479                                     cnamesig != NULL) {
4480                                         /*
4481                                          * If we've already got the CNAME RRSIG,
4482                                          * use it, otherwise change sigtype
4483                                          * so that we find it.
4484                                          */
4485                                         if (cnamesig != NULL)
4486                                                 foundsig = cnamesig;
4487                                         else
4488                                                 sigtype =
4489                                                     RBTDB_RDATATYPE_SIGCNAME;
4490                                         foundsig = cnamesig;
4491                                 }
4492                         } else if (header->type == sigtype) {
4493                                 /*
4494                                  * We've found the RRSIG rdataset for our
4495                                  * target type.  Remember it.
4496                                  */
4497                                 foundsig = header;
4498                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4499                                    header->type == negtype) {
4500                                 /*
4501                                  * We've found a negative cache entry.
4502                                  */
4503                                 found = header;
4504                         } else if (header->type == dns_rdatatype_ns) {
4505                                 /*
4506                                  * Remember a NS rdataset even if we're
4507                                  * not specifically looking for it, because
4508                                  * we might need it later.
4509                                  */
4510                                 nsheader = header;
4511                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4512                                 /*
4513                                  * If we need the NS rdataset, we'll also
4514                                  * need its signature.
4515                                  */
4516                                 nssig = header;
4517                         } else if (cname_ok &&
4518                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
4519                                 /*
4520                                  * If we get a CNAME match, we'll also need
4521                                  * its signature.
4522                                  */
4523                                 cnamesig = header;
4524                         }
4525                         header_prev = header;
4526                 } else
4527                         header_prev = header;
4528         }
4529
4530         if (empty_node) {
4531                 /*
4532                  * We have an exact match for the name, but there are no
4533                  * extant rdatasets.  That means that this node doesn't
4534                  * meaningfully exist, and that we really have a partial match.
4535                  */
4536                 NODE_UNLOCK(lock, locktype);
4537                 goto find_ns;
4538         }
4539
4540         /*
4541          * If we didn't find what we were looking for...
4542          */
4543         if (found == NULL ||
4544             (DNS_TRUST_ADDITIONAL(found->trust) &&
4545              ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4546             (found->trust == dns_trust_glue &&
4547              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4548             (DNS_TRUST_PENDING(found->trust) &&
4549              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4550                 /*
4551                  * If there is an NS rdataset at this node, then this is the
4552                  * deepest zone cut.
4553                  */
4554                 if (nsheader != NULL) {
4555                         if (nodep != NULL) {
4556                                 new_reference(search.rbtdb, node);
4557                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4558                                 *nodep = node;
4559                         }
4560                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
4561                                       rdataset);
4562                         if (need_headerupdate(nsheader, search.now))
4563                                 update = nsheader;
4564                         if (nssig != NULL) {
4565                                 bind_rdataset(search.rbtdb, node, nssig,
4566                                               search.now, sigrdataset);
4567                                 if (need_headerupdate(nssig, search.now))
4568                                         updatesig = nssig;
4569                         }
4570                         result = DNS_R_DELEGATION;
4571                         goto node_exit;
4572                 }
4573
4574                 /*
4575                  * Go find the deepest zone cut.
4576                  */
4577                 NODE_UNLOCK(lock, locktype);
4578                 goto find_ns;
4579         }
4580
4581         /*
4582          * We found what we were looking for, or we found a CNAME.
4583          */
4584
4585         if (nodep != NULL) {
4586                 new_reference(search.rbtdb, node);
4587                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4588                 *nodep = node;
4589         }
4590
4591         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4592                 /*
4593                  * We found a negative cache entry.
4594                  */
4595                 if (NXDOMAIN(found))
4596                         result = DNS_R_NCACHENXDOMAIN;
4597                 else
4598                         result = DNS_R_NCACHENXRRSET;
4599         } else if (type != found->type &&
4600                    type != dns_rdatatype_any &&
4601                    found->type == dns_rdatatype_cname) {
4602                 /*
4603                  * We weren't doing an ANY query and we found a CNAME instead
4604                  * of the type we were looking for, so we need to indicate
4605                  * that result to the caller.
4606                  */
4607                 result = DNS_R_CNAME;
4608         } else {
4609                 /*
4610                  * An ordinary successful query!
4611                  */
4612                 result = ISC_R_SUCCESS;
4613         }
4614
4615         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4616             result == DNS_R_NCACHENXRRSET) {
4617                 bind_rdataset(search.rbtdb, node, found, search.now,
4618                               rdataset);
4619                 if (need_headerupdate(found, search.now))
4620                         update = found;
4621                 if (foundsig != NULL) {
4622                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
4623                                       sigrdataset);
4624                         if (need_headerupdate(foundsig, search.now))
4625                                 updatesig = foundsig;
4626                 }
4627         }
4628
4629  node_exit:
4630         if ((update != NULL || updatesig != NULL) &&
4631             locktype != isc_rwlocktype_write) {
4632                 NODE_UNLOCK(lock, locktype);
4633                 NODE_LOCK(lock, isc_rwlocktype_write);
4634                 locktype = isc_rwlocktype_write;
4635         }
4636         if (update != NULL && need_headerupdate(update, search.now))
4637                 update_header(search.rbtdb, update, search.now);
4638         if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4639                 update_header(search.rbtdb, updatesig, search.now);
4640
4641         NODE_UNLOCK(lock, locktype);
4642
4643  tree_exit:
4644         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4645
4646         /*
4647          * If we found a zonecut but aren't going to use it, we have to
4648          * let go of it.
4649          */
4650         if (search.need_cleanup) {
4651                 node = search.zonecut;
4652                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4653
4654                 NODE_LOCK(lock, isc_rwlocktype_read);
4655                 decrement_reference(search.rbtdb, node, 0,
4656                                     isc_rwlocktype_read, isc_rwlocktype_none,
4657                                     ISC_FALSE);
4658                 NODE_UNLOCK(lock, isc_rwlocktype_read);
4659         }
4660
4661         dns_rbtnodechain_reset(&search.chain);
4662
4663         return (result);
4664 }
4665
4666 static isc_result_t
4667 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4668                   isc_stdtime_t now, dns_dbnode_t **nodep,
4669                   dns_name_t *foundname,
4670                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4671 {
4672         dns_rbtnode_t *node = NULL;
4673         nodelock_t *lock;
4674         isc_result_t result;
4675         rbtdb_search_t search;
4676         rdatasetheader_t *header, *header_prev, *header_next;
4677         rdatasetheader_t *found, *foundsig;
4678         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4679         isc_rwlocktype_t locktype;
4680
4681         search.rbtdb = (dns_rbtdb_t *)db;
4682
4683         REQUIRE(VALID_RBTDB(search.rbtdb));
4684
4685         if (now == 0)
4686                 isc_stdtime_get(&now);
4687
4688         search.rbtversion = NULL;
4689         search.serial = 1;
4690         search.options = options;
4691         search.copy_name = ISC_FALSE;
4692         search.need_cleanup = ISC_FALSE;
4693         search.wild = ISC_FALSE;
4694         search.zonecut = NULL;
4695         dns_fixedname_init(&search.zonecut_name);
4696         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4697         search.now = now;
4698
4699         if ((options & DNS_DBFIND_NOEXACT) != 0)
4700                 rbtoptions |= DNS_RBTFIND_NOEXACT;
4701
4702         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4703
4704         /*
4705          * Search down from the root of the tree.
4706          */
4707         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4708                                   &search.chain, rbtoptions, NULL, &search);
4709
4710         if (result == DNS_R_PARTIALMATCH) {
4711         find_ns:
4712                 result = find_deepest_zonecut(&search, node, nodep, foundname,
4713                                               rdataset, sigrdataset);
4714                 goto tree_exit;
4715         } else if (result != ISC_R_SUCCESS)
4716                 goto tree_exit;
4717
4718         /*
4719          * We now go looking for an NS rdataset at the node.
4720          */
4721
4722         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4723         locktype = isc_rwlocktype_read;
4724         NODE_LOCK(lock, locktype);
4725
4726         found = NULL;
4727         foundsig = NULL;
4728         header_prev = NULL;
4729         for (header = node->data; header != NULL; header = header_next) {
4730                 header_next = header->next;
4731                 if (header->rdh_ttl <= now) {
4732                         /*
4733                          * This rdataset is stale.  If no one else is using the
4734                          * node, we can clean it up right now, otherwise we
4735                          * mark it as stale, and the node as dirty, so it will
4736                          * get cleaned up later.
4737                          */
4738                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4739                             (locktype == isc_rwlocktype_write ||
4740                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4741                                 /*
4742                                  * We update the node's status only when we
4743                                  * can get write access.
4744                                  */
4745                                 locktype = isc_rwlocktype_write;
4746
4747                                 if (dns_rbtnode_refcurrent(node) == 0) {
4748                                         isc_mem_t *mctx;
4749
4750                                         mctx = search.rbtdb->common.mctx;
4751                                         clean_stale_headers(search.rbtdb, mctx,
4752                                                             header);
4753                                         if (header_prev != NULL)
4754                                                 header_prev->next =
4755                                                         header->next;
4756                                         else
4757                                                 node->data = header->next;
4758                                         free_rdataset(search.rbtdb, mctx,
4759                                                       header);
4760                                 } else {
4761                                         header->attributes |=
4762                                                 RDATASET_ATTR_STALE;
4763                                         node->dirty = 1;
4764                                         header_prev = header;
4765                                 }
4766                         } else
4767                                 header_prev = header;
4768                 } else if (EXISTS(header)) {
4769                         /*
4770                          * If we found a type we were looking for, remember
4771                          * it.
4772                          */
4773                         if (header->type == dns_rdatatype_ns) {
4774                                 /*
4775                                  * Remember a NS rdataset even if we're
4776                                  * not specifically looking for it, because
4777                                  * we might need it later.
4778                                  */
4779                                 found = header;
4780                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4781                                 /*
4782                                  * If we need the NS rdataset, we'll also
4783                                  * need its signature.
4784                                  */
4785                                 foundsig = header;
4786                         }
4787                         header_prev = header;
4788                 } else
4789                         header_prev = header;
4790         }
4791
4792         if (found == NULL) {
4793                 /*
4794                  * No NS records here.
4795                  */
4796                 NODE_UNLOCK(lock, locktype);
4797                 goto find_ns;
4798         }
4799
4800         if (nodep != NULL) {
4801                 new_reference(search.rbtdb, node);
4802                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4803                 *nodep = node;
4804         }
4805
4806         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4807         if (foundsig != NULL)
4808                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4809                               sigrdataset);
4810
4811         if (need_headerupdate(found, search.now) ||
4812             (foundsig != NULL &&  need_headerupdate(foundsig, search.now))) {
4813                 if (locktype != isc_rwlocktype_write) {
4814                         NODE_UNLOCK(lock, locktype);
4815                         NODE_LOCK(lock, isc_rwlocktype_write);
4816                         locktype = isc_rwlocktype_write;
4817                 }
4818                 if (need_headerupdate(found, search.now))
4819                         update_header(search.rbtdb, found, search.now);
4820                 if (foundsig != NULL &&
4821                     need_headerupdate(foundsig, search.now)) {
4822                         update_header(search.rbtdb, foundsig, search.now);
4823                 }
4824         }
4825
4826         NODE_UNLOCK(lock, locktype);
4827
4828  tree_exit:
4829         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4830
4831         INSIST(!search.need_cleanup);
4832
4833         dns_rbtnodechain_reset(&search.chain);
4834
4835         if (result == DNS_R_DELEGATION)
4836                 result = ISC_R_SUCCESS;
4837
4838         return (result);
4839 }
4840
4841 static void
4842 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4843         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4844         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4845         unsigned int refs;
4846
4847         REQUIRE(VALID_RBTDB(rbtdb));
4848         REQUIRE(targetp != NULL && *targetp == NULL);
4849
4850         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4851         dns_rbtnode_refincrement(node, &refs);
4852         INSIST(refs != 0);
4853         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4854
4855         *targetp = source;
4856 }
4857
4858 static void
4859 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4860         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4861         dns_rbtnode_t *node;
4862         isc_boolean_t want_free = ISC_FALSE;
4863         isc_boolean_t inactive = ISC_FALSE;
4864         rbtdb_nodelock_t *nodelock;
4865
4866         REQUIRE(VALID_RBTDB(rbtdb));
4867         REQUIRE(targetp != NULL && *targetp != NULL);
4868
4869         node = (dns_rbtnode_t *)(*targetp);
4870         nodelock = &rbtdb->node_locks[node->locknum];
4871
4872         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4873
4874         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4875                                 isc_rwlocktype_none, ISC_FALSE)) {
4876                 if (isc_refcount_current(&nodelock->references) == 0 &&
4877                     nodelock->exiting) {
4878                         inactive = ISC_TRUE;
4879                 }
4880         }
4881
4882         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4883
4884         *targetp = NULL;
4885
4886         if (inactive) {
4887                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4888                 rbtdb->active--;
4889                 if (rbtdb->active == 0)
4890                         want_free = ISC_TRUE;
4891                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4892                 if (want_free) {
4893                         char buf[DNS_NAME_FORMATSIZE];
4894                         if (dns_name_dynamic(&rbtdb->common.origin))
4895                                 dns_name_format(&rbtdb->common.origin, buf,
4896                                                 sizeof(buf));
4897                         else
4898                                 strcpy(buf, "<UNKNOWN>");
4899                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4900                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4901                                       "calling free_rbtdb(%s)", buf);
4902                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
4903                 }
4904         }
4905 }
4906
4907 static isc_result_t
4908 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4909         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4910         dns_rbtnode_t *rbtnode = node;
4911         rdatasetheader_t *header;
4912         isc_boolean_t force_expire = ISC_FALSE;
4913         /*
4914          * These are the category and module used by the cache cleaner.
4915          */
4916         isc_boolean_t log = ISC_FALSE;
4917         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4918         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4919         int level = ISC_LOG_DEBUG(2);
4920         char printname[DNS_NAME_FORMATSIZE];
4921
4922         REQUIRE(VALID_RBTDB(rbtdb));
4923
4924         /*
4925          * Caller must hold a tree lock.
4926          */
4927
4928         if (now == 0)
4929                 isc_stdtime_get(&now);
4930
4931         if (rbtdb->overmem) {
4932                 isc_uint32_t val;
4933
4934                 isc_random_get(&val);
4935                 /*
4936                  * XXXDCL Could stand to have a better policy, like LRU.
4937                  */
4938                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
4939
4940                 /*
4941                  * Note that 'log' can be true IFF rbtdb->overmem is also true.
4942                  * rbtdb->overmem can currently only be true for cache
4943                  * databases -- hence all of the "overmem cache" log strings.
4944                  */
4945                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
4946                 if (log)
4947                         isc_log_write(dns_lctx, category, module, level,
4948                                       "overmem cache: %s %s",
4949                                       force_expire ? "FORCE" : "check",
4950                                       dns_rbt_formatnodename(rbtnode,
4951                                                            printname,
4952                                                            sizeof(printname)));
4953         }
4954
4955         /*
4956          * We may not need write access, but this code path is not performance
4957          * sensitive, so it should be okay to always lock as a writer.
4958          */
4959         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4960                   isc_rwlocktype_write);
4961
4962         for (header = rbtnode->data; header != NULL; header = header->next)
4963                 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
4964                         /*
4965                          * We don't check if refcurrent(rbtnode) == 0 and try
4966                          * to free like we do in cache_find(), because
4967                          * refcurrent(rbtnode) must be non-zero.  This is so
4968                          * because 'node' is an argument to the function.
4969                          */
4970                         header->attributes |= RDATASET_ATTR_STALE;
4971                         rbtnode->dirty = 1;
4972                         if (log)
4973                                 isc_log_write(dns_lctx, category, module,
4974                                               level, "overmem cache: stale %s",
4975                                               printname);
4976                 } else if (force_expire) {
4977                         if (! RETAIN(header)) {
4978                                 set_ttl(rbtdb, header, 0);
4979                                 header->attributes |= RDATASET_ATTR_STALE;
4980                                 rbtnode->dirty = 1;
4981                         } else if (log) {
4982                                 isc_log_write(dns_lctx, category, module,
4983                                               level, "overmem cache: "
4984                                               "reprieve by RETAIN() %s",
4985                                               printname);
4986                         }
4987                 } else if (rbtdb->overmem && log)
4988                         isc_log_write(dns_lctx, category, module, level,
4989                                       "overmem cache: saved %s", printname);
4990
4991         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4992                     isc_rwlocktype_write);
4993
4994         return (ISC_R_SUCCESS);
4995 }
4996
4997 static void
4998 overmem(dns_db_t *db, isc_boolean_t overmem) {
4999         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5000
5001         if (IS_CACHE(rbtdb))
5002                 rbtdb->overmem = overmem;
5003 }
5004
5005 static void
5006 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5007         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5008         dns_rbtnode_t *rbtnode = node;
5009         isc_boolean_t first;
5010
5011         REQUIRE(VALID_RBTDB(rbtdb));
5012
5013         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5014                   isc_rwlocktype_read);
5015
5016         fprintf(out, "node %p, %u references, locknum = %u\n",
5017                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5018                 rbtnode->locknum);
5019         if (rbtnode->data != NULL) {
5020                 rdatasetheader_t *current, *top_next;
5021
5022                 for (current = rbtnode->data; current != NULL;
5023                      current = top_next) {
5024                         top_next = current->next;
5025                         first = ISC_TRUE;
5026                         fprintf(out, "\ttype %u", current->type);
5027                         do {
5028                                 if (!first)
5029                                         fprintf(out, "\t");
5030                                 first = ISC_FALSE;
5031                                 fprintf(out,
5032                                         "\tserial = %lu, ttl = %u, "
5033                                         "trust = %u, attributes = %u, "
5034                                         "resign = %u\n",
5035                                         (unsigned long)current->serial,
5036                                         current->rdh_ttl,
5037                                         current->trust,
5038                                         current->attributes,
5039                                         current->resign);
5040                                 current = current->down;
5041                         } while (current != NULL);
5042                 }
5043         } else
5044                 fprintf(out, "(empty)\n");
5045
5046         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5047                     isc_rwlocktype_read);
5048 }
5049
5050 static isc_result_t
5051 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5052 {
5053         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5054         rbtdb_dbiterator_t *rbtdbiter;
5055
5056         REQUIRE(VALID_RBTDB(rbtdb));
5057
5058         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5059         if (rbtdbiter == NULL)
5060                 return (ISC_R_NOMEMORY);
5061
5062         rbtdbiter->common.methods = &dbiterator_methods;
5063         rbtdbiter->common.db = NULL;
5064         dns_db_attach(db, &rbtdbiter->common.db);
5065         rbtdbiter->common.relative_names =
5066                         ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5067         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5068         rbtdbiter->common.cleaning = ISC_FALSE;
5069         rbtdbiter->paused = ISC_TRUE;
5070         rbtdbiter->tree_locked = isc_rwlocktype_none;
5071         rbtdbiter->result = ISC_R_SUCCESS;
5072         dns_fixedname_init(&rbtdbiter->name);
5073         dns_fixedname_init(&rbtdbiter->origin);
5074         rbtdbiter->node = NULL;
5075         rbtdbiter->delete = 0;
5076         rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5077         rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5078         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5079         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5080         dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5081         if (rbtdbiter->nsec3only)
5082                 rbtdbiter->current = &rbtdbiter->nsec3chain;
5083         else
5084                 rbtdbiter->current = &rbtdbiter->chain;
5085
5086         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5087
5088         return (ISC_R_SUCCESS);
5089 }
5090
5091 static isc_result_t
5092 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5093                   dns_rdatatype_t type, dns_rdatatype_t covers,
5094                   isc_stdtime_t now, dns_rdataset_t *rdataset,
5095                   dns_rdataset_t *sigrdataset)
5096 {
5097         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5098         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5099         rdatasetheader_t *header, *header_next, *found, *foundsig;
5100         rbtdb_serial_t serial;
5101         rbtdb_version_t *rbtversion = version;
5102         isc_boolean_t close_version = ISC_FALSE;
5103         rbtdb_rdatatype_t matchtype, sigmatchtype;
5104
5105         REQUIRE(VALID_RBTDB(rbtdb));
5106         REQUIRE(type != dns_rdatatype_any);
5107
5108         if (rbtversion == NULL) {
5109                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5110                 close_version = ISC_TRUE;
5111         }
5112         serial = rbtversion->serial;
5113         now = 0;
5114
5115         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5116                   isc_rwlocktype_read);
5117
5118         found = NULL;
5119         foundsig = NULL;
5120         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5121         if (covers == 0)
5122                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5123         else
5124                 sigmatchtype = 0;
5125
5126         for (header = rbtnode->data; header != NULL; header = header_next) {
5127                 header_next = header->next;
5128                 do {
5129                         if (header->serial <= serial &&
5130                             !IGNORE(header)) {
5131                                 /*
5132                                  * Is this a "this rdataset doesn't
5133                                  * exist" record?
5134                                  */
5135                                 if (NONEXISTENT(header))
5136                                         header = NULL;
5137                                 break;
5138                         } else
5139                                 header = header->down;
5140                 } while (header != NULL);
5141                 if (header != NULL) {
5142                         /*
5143                          * We have an active, extant rdataset.  If it's a
5144                          * type we're looking for, remember it.
5145                          */
5146                         if (header->type == matchtype) {
5147                                 found = header;
5148                                 if (foundsig != NULL)
5149                                         break;
5150                         } else if (header->type == sigmatchtype) {
5151                                 foundsig = header;
5152                                 if (found != NULL)
5153                                         break;
5154                         }
5155                 }
5156         }
5157         if (found != NULL) {
5158                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5159                 if (foundsig != NULL)
5160                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5161                                       sigrdataset);
5162         }
5163
5164         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5165                     isc_rwlocktype_read);
5166
5167         if (close_version)
5168                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5169                              ISC_FALSE);
5170
5171         if (found == NULL)
5172                 return (ISC_R_NOTFOUND);
5173
5174         return (ISC_R_SUCCESS);
5175 }
5176
5177 static isc_result_t
5178 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5179                    dns_rdatatype_t type, dns_rdatatype_t covers,
5180                    isc_stdtime_t now, dns_rdataset_t *rdataset,
5181                    dns_rdataset_t *sigrdataset)
5182 {
5183         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5184         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5185         rdatasetheader_t *header, *header_next, *found, *foundsig;
5186         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5187         isc_result_t result;
5188         nodelock_t *lock;
5189         isc_rwlocktype_t locktype;
5190
5191         REQUIRE(VALID_RBTDB(rbtdb));
5192         REQUIRE(type != dns_rdatatype_any);
5193
5194         UNUSED(version);
5195
5196         result = ISC_R_SUCCESS;
5197
5198         if (now == 0)
5199                 isc_stdtime_get(&now);
5200
5201         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5202         locktype = isc_rwlocktype_read;
5203         NODE_LOCK(lock, locktype);
5204
5205         found = NULL;
5206         foundsig = NULL;
5207         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5208         negtype = RBTDB_RDATATYPE_VALUE(0, type);
5209         if (covers == 0)
5210                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5211         else
5212                 sigmatchtype = 0;
5213
5214         for (header = rbtnode->data; header != NULL; header = header_next) {
5215                 header_next = header->next;
5216                 if (header->rdh_ttl <= now) {
5217                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5218                             (locktype == isc_rwlocktype_write ||
5219                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5220                                 /*
5221                                  * We update the node's status only when we
5222                                  * can get write access.
5223                                  */
5224                                 locktype = isc_rwlocktype_write;
5225
5226                                 /*
5227                                  * We don't check if refcurrent(rbtnode) == 0
5228                                  * and try to free like we do in cache_find(),
5229                                  * because refcurrent(rbtnode) must be
5230                                  * non-zero.  This is so because 'node' is an
5231                                  * argument to the function.
5232                                  */
5233                                 header->attributes |= RDATASET_ATTR_STALE;
5234                                 rbtnode->dirty = 1;
5235                         }
5236                 } else if (EXISTS(header)) {
5237                         if (header->type == matchtype)
5238                                 found = header;
5239                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5240                                  header->type == negtype)
5241                                 found = header;
5242                         else if (header->type == sigmatchtype)
5243                                 foundsig = header;
5244                 }
5245         }
5246         if (found != NULL) {
5247                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5248                 if (foundsig != NULL)
5249                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5250                                       sigrdataset);
5251         }
5252
5253         NODE_UNLOCK(lock, locktype);
5254
5255         if (found == NULL)
5256                 return (ISC_R_NOTFOUND);
5257
5258         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
5259                 /*
5260                  * We found a negative cache entry.
5261                  */
5262                 if (NXDOMAIN(found))
5263                         result = DNS_R_NCACHENXDOMAIN;
5264                 else
5265                         result = DNS_R_NCACHENXRRSET;
5266         }
5267
5268         return (result);
5269 }
5270
5271 static isc_result_t
5272 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5273              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5274 {
5275         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5276         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5277         rbtdb_version_t *rbtversion = version;
5278         rbtdb_rdatasetiter_t *iterator;
5279         unsigned int refs;
5280
5281         REQUIRE(VALID_RBTDB(rbtdb));
5282
5283         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5284         if (iterator == NULL)
5285                 return (ISC_R_NOMEMORY);
5286
5287         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5288                 now = 0;
5289                 if (rbtversion == NULL)
5290                         currentversion(db,
5291                                  (dns_dbversion_t **) (void *)(&rbtversion));
5292                 else {
5293                         unsigned int refs;
5294
5295                         isc_refcount_increment(&rbtversion->references,
5296                                                &refs);
5297                         INSIST(refs > 1);
5298                 }
5299         } else {
5300                 if (now == 0)
5301                         isc_stdtime_get(&now);
5302                 rbtversion = NULL;
5303         }
5304
5305         iterator->common.magic = DNS_RDATASETITER_MAGIC;
5306         iterator->common.methods = &rdatasetiter_methods;
5307         iterator->common.db = db;
5308         iterator->common.node = node;
5309         iterator->common.version = (dns_dbversion_t *)rbtversion;
5310         iterator->common.now = now;
5311
5312         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5313
5314         dns_rbtnode_refincrement(rbtnode, &refs);
5315         INSIST(refs != 0);
5316
5317         iterator->current = NULL;
5318
5319         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5320
5321         *iteratorp = (dns_rdatasetiter_t *)iterator;
5322
5323         return (ISC_R_SUCCESS);
5324 }
5325
5326 static isc_boolean_t
5327 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5328         rdatasetheader_t *header, *header_next;
5329         isc_boolean_t cname, other_data;
5330         dns_rdatatype_t rdtype;
5331
5332         /*
5333          * The caller must hold the node lock.
5334          */
5335
5336         /*
5337          * Look for CNAME and "other data" rdatasets active in our version.
5338          */
5339         cname = ISC_FALSE;
5340         other_data = ISC_FALSE;
5341         for (header = node->data; header != NULL; header = header_next) {
5342                 header_next = header->next;
5343                 if (header->type == dns_rdatatype_cname) {
5344                         /*
5345                          * Look for an active extant CNAME.
5346                          */
5347                         do {
5348                                 if (header->serial <= serial &&
5349                                     !IGNORE(header)) {
5350                                         /*
5351                                          * Is this a "this rdataset doesn't
5352                                          * exist" record?
5353                                          */
5354                                         if (NONEXISTENT(header))
5355                                                 header = NULL;
5356                                         break;
5357                                 } else
5358                                         header = header->down;
5359                         } while (header != NULL);
5360                         if (header != NULL)
5361                                 cname = ISC_TRUE;
5362                 } else {
5363                         /*
5364                          * Look for active extant "other data".
5365                          *
5366                          * "Other data" is any rdataset whose type is not
5367                          * KEY, NSEC, SIG or RRSIG.
5368                          */
5369                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
5370                         if (rdtype != dns_rdatatype_key &&
5371                             rdtype != dns_rdatatype_sig &&
5372                             rdtype != dns_rdatatype_nsec &&
5373                             rdtype != dns_rdatatype_rrsig) {
5374                                 /*
5375                                  * Is it active and extant?
5376                                  */
5377                                 do {
5378                                         if (header->serial <= serial &&
5379                                             !IGNORE(header)) {
5380                                                 /*
5381                                                  * Is this a "this rdataset
5382                                                  * doesn't exist" record?
5383                                                  */
5384                                                 if (NONEXISTENT(header))
5385                                                         header = NULL;
5386                                                 break;
5387                                         } else
5388                                                 header = header->down;
5389                                 } while (header != NULL);
5390                                 if (header != NULL)
5391                                         other_data = ISC_TRUE;
5392                         }
5393                 }
5394         }
5395
5396         if (cname && other_data)
5397                 return (ISC_TRUE);
5398
5399         return (ISC_FALSE);
5400 }
5401
5402 static isc_result_t
5403 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5404         isc_result_t result;
5405
5406         INSIST(!IS_CACHE(rbtdb));
5407         INSIST(newheader->heap_index == 0);
5408         INSIST(!ISC_LINK_LINKED(newheader, link));
5409
5410         result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5411         return (result);
5412 }
5413
5414 static isc_result_t
5415 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5416     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5417     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5418 {
5419         rbtdb_changed_t *changed = NULL;
5420         rdatasetheader_t *topheader, *topheader_prev, *header;
5421         unsigned char *merged;
5422         isc_result_t result;
5423         isc_boolean_t header_nx;
5424         isc_boolean_t newheader_nx;
5425         isc_boolean_t merge;
5426         dns_rdatatype_t rdtype, covers;
5427         rbtdb_rdatatype_t negtype;
5428         dns_trust_t trust;
5429         int idx;
5430
5431         /*
5432          * Add an rdatasetheader_t to a node.
5433          */
5434
5435         /*
5436          * Caller must be holding the node lock.
5437          */
5438
5439         if ((options & DNS_DBADD_MERGE) != 0) {
5440                 REQUIRE(rbtversion != NULL);
5441                 merge = ISC_TRUE;
5442         } else
5443                 merge = ISC_FALSE;
5444
5445         if ((options & DNS_DBADD_FORCE) != 0)
5446                 trust = dns_trust_ultimate;
5447         else
5448                 trust = newheader->trust;
5449
5450         if (rbtversion != NULL && !loading) {
5451                 /*
5452                  * We always add a changed record, even if no changes end up
5453                  * being made to this node, because it's harmless and
5454                  * simplifies the code.
5455                  */
5456                 changed = add_changed(rbtdb, rbtversion, rbtnode);
5457                 if (changed == NULL) {
5458                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5459                         return (ISC_R_NOMEMORY);
5460                 }
5461         }
5462
5463         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5464         topheader_prev = NULL;
5465
5466         negtype = 0;
5467         if (rbtversion == NULL && !newheader_nx) {
5468                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5469                 if (rdtype == 0) {
5470                         /*
5471                          * We're adding a negative cache entry.
5472                          */
5473                         covers = RBTDB_RDATATYPE_EXT(newheader->type);
5474                         if (covers == dns_rdatatype_any) {
5475                                 /*
5476                                  * We're adding an negative cache entry
5477                                  * which covers all types (NXDOMAIN,
5478                                  * NODATA(QTYPE=ANY)).
5479                                  *
5480                                  * We make all other data stale so that the
5481                                  * only rdataset that can be found at this
5482                                  * node is the negative cache entry.
5483                                  */
5484                                 for (topheader = rbtnode->data;
5485                                      topheader != NULL;
5486                                      topheader = topheader->next) {
5487                                         set_ttl(rbtdb, topheader, 0);
5488                                         topheader->attributes |=
5489                                                 RDATASET_ATTR_STALE;
5490                                 }
5491                                 rbtnode->dirty = 1;
5492                                 goto find_header;
5493                         }
5494                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5495                 } else {
5496                         /*
5497                          * We're adding something that isn't a
5498                          * negative cache entry.  Look for an extant
5499                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5500                          * cache entry.
5501                          */
5502                         for (topheader = rbtnode->data;
5503                              topheader != NULL;
5504                              topheader = topheader->next) {
5505                                 if (topheader->type ==
5506                                     RBTDB_RDATATYPE_NCACHEANY)
5507                                         break;
5508                         }
5509                         if (topheader != NULL && EXISTS(topheader) &&
5510                             topheader->rdh_ttl > now) {
5511                                 /*
5512                                  * Found one.
5513                                  */
5514                                 if (trust < topheader->trust) {
5515                                         /*
5516                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
5517                                          * is more trusted.
5518                                          */
5519                                         free_rdataset(rbtdb,
5520                                                       rbtdb->common.mctx,
5521                                                       newheader);
5522                                         if (addedrdataset != NULL)
5523                                                 bind_rdataset(rbtdb, rbtnode,
5524                                                               topheader, now,
5525                                                               addedrdataset);
5526                                         return (DNS_R_UNCHANGED);
5527                                 }
5528                                 /*
5529                                  * The new rdataset is better.  Expire the
5530                                  * NXDOMAIN/NODATA(QTYPE=ANY).
5531                                  */
5532                                 set_ttl(rbtdb, topheader, 0);
5533                                 topheader->attributes |= RDATASET_ATTR_STALE;
5534                                 rbtnode->dirty = 1;
5535                                 topheader = NULL;
5536                                 goto find_header;
5537                         }
5538                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5539                 }
5540         }
5541
5542         for (topheader = rbtnode->data;
5543              topheader != NULL;
5544              topheader = topheader->next) {
5545                 if (topheader->type == newheader->type ||
5546                     topheader->type == negtype)
5547                         break;
5548                 topheader_prev = topheader;
5549         }
5550
5551  find_header:
5552         /*
5553          * If header isn't NULL, we've found the right type.  There may be
5554          * IGNORE rdatasets between the top of the chain and the first real
5555          * data.  We skip over them.
5556          */
5557         header = topheader;
5558         while (header != NULL && IGNORE(header))
5559                 header = header->down;
5560         if (header != NULL) {
5561                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5562
5563                 /*
5564                  * Deleting an already non-existent rdataset has no effect.
5565                  */
5566                 if (header_nx && newheader_nx) {
5567                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5568                         return (DNS_R_UNCHANGED);
5569                 }
5570
5571                 /*
5572                  * Trying to add an rdataset with lower trust to a cache DB
5573                  * has no effect, provided that the cache data isn't stale.
5574                  */
5575                 if (rbtversion == NULL && trust < header->trust &&
5576                     (header->rdh_ttl > now || header_nx)) {
5577                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5578                         if (addedrdataset != NULL)
5579                                 bind_rdataset(rbtdb, rbtnode, header, now,
5580                                               addedrdataset);
5581                         return (DNS_R_UNCHANGED);
5582                 }
5583
5584                 /*
5585                  * Don't merge if a nonexistent rdataset is involved.
5586                  */
5587                 if (merge && (header_nx || newheader_nx))
5588                         merge = ISC_FALSE;
5589
5590                 /*
5591                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5592                  * that is the union of 'newheader' and 'header'.
5593                  */
5594                 if (merge) {
5595                         unsigned int flags = 0;
5596                         INSIST(rbtversion->serial >= header->serial);
5597                         merged = NULL;
5598                         result = ISC_R_SUCCESS;
5599
5600                         if ((options & DNS_DBADD_EXACT) != 0)
5601                                 flags |= DNS_RDATASLAB_EXACT;
5602                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5603                              newheader->rdh_ttl != header->rdh_ttl)
5604                                         result = DNS_R_NOTEXACT;
5605                         else if (newheader->rdh_ttl != header->rdh_ttl)
5606                                 flags |= DNS_RDATASLAB_FORCE;
5607                         if (result == ISC_R_SUCCESS)
5608                                 result = dns_rdataslab_merge(
5609                                              (unsigned char *)header,
5610                                              (unsigned char *)newheader,
5611                                              (unsigned int)(sizeof(*newheader)),
5612                                              rbtdb->common.mctx,
5613                                              rbtdb->common.rdclass,
5614                                              (dns_rdatatype_t)header->type,
5615                                              flags, &merged);
5616                         if (result == ISC_R_SUCCESS) {
5617                                 /*
5618                                  * If 'header' has the same serial number as
5619                                  * we do, we could clean it up now if we knew
5620                                  * that our caller had no references to it.
5621                                  * We don't know this, however, so we leave it
5622                                  * alone.  It will get cleaned up when
5623                                  * clean_zone_node() runs.
5624                                  */
5625                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5626                                               newheader);
5627                                 newheader = (rdatasetheader_t *)merged;
5628                                 if (loading && RESIGN(newheader) &&
5629                                     RESIGN(header) &&
5630                                     header->resign < newheader->resign)
5631                                         newheader->resign = header->resign;
5632                         } else {
5633                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5634                                               newheader);
5635                                 return (result);
5636                         }
5637                 }
5638                 /*
5639                  * Don't replace existing NS, A and AAAA RRsets
5640                  * in the cache if they are already exist.  This
5641                  * prevents named being locked to old servers.
5642                  * Don't lower trust of existing record if the
5643                  * update is forced.
5644                  */
5645                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5646                     header->type == dns_rdatatype_ns &&
5647                     !header_nx && !newheader_nx &&
5648                     header->trust >= newheader->trust &&
5649                     dns_rdataslab_equalx((unsigned char *)header,
5650                                          (unsigned char *)newheader,
5651                                          (unsigned int)(sizeof(*newheader)),
5652                                          rbtdb->common.rdclass,
5653                                          (dns_rdatatype_t)header->type)) {
5654                         /*
5655                          * Honour the new ttl if it is less than the
5656                          * older one.
5657                          */
5658                         if (header->rdh_ttl > newheader->rdh_ttl)
5659                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5660                         if (header->noqname == NULL &&
5661                             newheader->noqname != NULL) {
5662                                 header->noqname = newheader->noqname;
5663                                 newheader->noqname = NULL;
5664                         }
5665                         if (header->closest == NULL &&
5666                             newheader->closest != NULL) {
5667                                 header->closest = newheader->closest;
5668                                 newheader->closest = NULL;
5669                         }
5670                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5671                         if (addedrdataset != NULL)
5672                                 bind_rdataset(rbtdb, rbtnode, header, now,
5673                                               addedrdataset);
5674                         return (ISC_R_SUCCESS);
5675                 }
5676                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5677                     (header->type == dns_rdatatype_a ||
5678                      header->type == dns_rdatatype_aaaa) &&
5679                     !header_nx && !newheader_nx &&
5680                     header->trust >= newheader->trust &&
5681                     dns_rdataslab_equal((unsigned char *)header,
5682                                         (unsigned char *)newheader,
5683                                         (unsigned int)(sizeof(*newheader)))) {
5684                         /*
5685                          * Honour the new ttl if it is less than the
5686                          * older one.
5687                          */
5688                         if (header->rdh_ttl > newheader->rdh_ttl)
5689                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5690                         if (header->noqname == NULL &&
5691                             newheader->noqname != NULL) {
5692                                 header->noqname = newheader->noqname;
5693                                 newheader->noqname = NULL;
5694                         }
5695                         if (header->closest == NULL &&
5696                             newheader->closest != NULL) {
5697                                 header->closest = newheader->closest;
5698                                 newheader->closest = NULL;
5699                         }
5700                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5701                         if (addedrdataset != NULL)
5702                                 bind_rdataset(rbtdb, rbtnode, header, now,
5703                                               addedrdataset);
5704                         return (ISC_R_SUCCESS);
5705                 }
5706                 INSIST(rbtversion == NULL ||
5707                        rbtversion->serial >= topheader->serial);
5708                 if (topheader_prev != NULL)
5709                         topheader_prev->next = newheader;
5710                 else
5711                         rbtnode->data = newheader;
5712                 newheader->next = topheader->next;
5713                 if (loading) {
5714                         /*
5715                          * There are no other references to 'header' when
5716                          * loading, so we MAY clean up 'header' now.
5717                          * Since we don't generate changed records when
5718                          * loading, we MUST clean up 'header' now.
5719                          */
5720                         newheader->down = NULL;
5721                         free_rdataset(rbtdb, rbtdb->common.mctx, header);
5722                 } else {
5723                         newheader->down = topheader;
5724                         topheader->next = newheader;
5725                         rbtnode->dirty = 1;
5726                         if (changed != NULL)
5727                                 changed->dirty = ISC_TRUE;
5728                         if (rbtversion == NULL) {
5729                                 set_ttl(rbtdb, header, 0);
5730                                 header->attributes |= RDATASET_ATTR_STALE;
5731                         }
5732                         idx = newheader->node->locknum;
5733                         if (IS_CACHE(rbtdb)) {
5734                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5735                                                  newheader, link);
5736                                 /*
5737                                  * XXXMLG We don't check the return value
5738                                  * here.  If it fails, we will not do TTL
5739                                  * based expiry on this node.  However, we
5740                                  * will do it on the LRU side, so memory
5741                                  * will not leak... for long.
5742                                  */
5743                                 isc_heap_insert(rbtdb->heaps[idx], newheader);
5744                         } else if (RESIGN(newheader))
5745                                 resign_insert(rbtdb, idx, newheader);
5746                 }
5747         } else {
5748                 /*
5749                  * No non-IGNORED rdatasets of the given type exist at
5750                  * this node.
5751                  */
5752
5753                 /*
5754                  * If we're trying to delete the type, don't bother.
5755                  */
5756                 if (newheader_nx) {
5757                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5758                         return (DNS_R_UNCHANGED);
5759                 }
5760
5761                 if (topheader != NULL) {
5762                         /*
5763                          * We have an list of rdatasets of the given type,
5764                          * but they're all marked IGNORE.  We simply insert
5765                          * the new rdataset at the head of the list.
5766                          *
5767                          * Ignored rdatasets cannot occur during loading, so
5768                          * we INSIST on it.
5769                          */
5770                         INSIST(!loading);
5771                         INSIST(rbtversion == NULL ||
5772                                rbtversion->serial >= topheader->serial);
5773                         if (topheader_prev != NULL)
5774                                 topheader_prev->next = newheader;
5775                         else
5776                                 rbtnode->data = newheader;
5777                         newheader->next = topheader->next;
5778                         newheader->down = topheader;
5779                         topheader->next = newheader;
5780                         rbtnode->dirty = 1;
5781                         if (changed != NULL)
5782                                 changed->dirty = ISC_TRUE;
5783                 } else {
5784                         /*
5785                          * No rdatasets of the given type exist at the node.
5786                          */
5787                         newheader->next = rbtnode->data;
5788                         newheader->down = NULL;
5789                         rbtnode->data = newheader;
5790                 }
5791                 idx = newheader->node->locknum;
5792                 if (IS_CACHE(rbtdb)) {
5793                         ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5794                                          newheader, link);
5795                         isc_heap_insert(rbtdb->heaps[idx], newheader);
5796                 } else if (RESIGN(newheader)) {
5797                         resign_insert(rbtdb, idx, newheader);
5798                 }
5799         }
5800
5801         /*
5802          * Check if the node now contains CNAME and other data.
5803          */
5804         if (rbtversion != NULL &&
5805             cname_and_other_data(rbtnode, rbtversion->serial))
5806                 return (DNS_R_CNAMEANDOTHER);
5807
5808         if (addedrdataset != NULL)
5809                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5810
5811         return (ISC_R_SUCCESS);
5812 }
5813
5814 static inline isc_boolean_t
5815 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5816                 rbtdb_rdatatype_t type)
5817 {
5818         if (IS_CACHE(rbtdb)) {
5819                 if (type == dns_rdatatype_dname)
5820                         return (ISC_TRUE);
5821                 else
5822                         return (ISC_FALSE);
5823         } else if (type == dns_rdatatype_dname ||
5824                    (type == dns_rdatatype_ns &&
5825                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5826                 return (ISC_TRUE);
5827         return (ISC_FALSE);
5828 }
5829
5830 static inline isc_result_t
5831 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5832            dns_rdataset_t *rdataset)
5833 {
5834         struct noqname *noqname;
5835         isc_mem_t *mctx = rbtdb->common.mctx;
5836         dns_name_t name;
5837         dns_rdataset_t neg, negsig;
5838         isc_result_t result;
5839         isc_region_t r;
5840
5841         dns_name_init(&name, NULL);
5842         dns_rdataset_init(&neg);
5843         dns_rdataset_init(&negsig);
5844
5845         result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
5846         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5847
5848         noqname = isc_mem_get(mctx, sizeof(*noqname));
5849         if (noqname == NULL) {
5850                 result = ISC_R_NOMEMORY;
5851                 goto cleanup;
5852         }
5853         dns_name_init(&noqname->name, NULL);
5854         noqname->neg = NULL;
5855         noqname->negsig = NULL;
5856         noqname->type = neg.type;
5857         result = dns_name_dup(&name, mctx, &noqname->name);
5858         if (result != ISC_R_SUCCESS)
5859                 goto cleanup;
5860         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5861         if (result != ISC_R_SUCCESS)
5862                 goto cleanup;
5863         noqname->neg = r.base;
5864         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5865         if (result != ISC_R_SUCCESS)
5866                 goto cleanup;
5867         noqname->negsig = r.base;
5868         dns_rdataset_disassociate(&neg);
5869         dns_rdataset_disassociate(&negsig);
5870         newheader->noqname = noqname;
5871         return (ISC_R_SUCCESS);
5872
5873 cleanup:
5874         dns_rdataset_disassociate(&neg);
5875         dns_rdataset_disassociate(&negsig);
5876         free_noqname(mctx, &noqname);
5877         return(result);
5878 }
5879
5880 static inline isc_result_t
5881 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5882            dns_rdataset_t *rdataset)
5883 {
5884         struct noqname *closest;
5885         isc_mem_t *mctx = rbtdb->common.mctx;
5886         dns_name_t name;
5887         dns_rdataset_t neg, negsig;
5888         isc_result_t result;
5889         isc_region_t r;
5890
5891         dns_name_init(&name, NULL);
5892         dns_rdataset_init(&neg);
5893         dns_rdataset_init(&negsig);
5894
5895         result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
5896         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5897
5898         closest = isc_mem_get(mctx, sizeof(*closest));
5899         if (closest == NULL) {
5900                 result = ISC_R_NOMEMORY;
5901                 goto cleanup;
5902         }
5903         dns_name_init(&closest->name, NULL);
5904         closest->neg = NULL;
5905         closest->negsig = NULL;
5906         closest->type = neg.type;
5907         result = dns_name_dup(&name, mctx, &closest->name);
5908         if (result != ISC_R_SUCCESS)
5909                 goto cleanup;
5910         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5911         if (result != ISC_R_SUCCESS)
5912                 goto cleanup;
5913         closest->neg = r.base;
5914         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5915         if (result != ISC_R_SUCCESS)
5916                 goto cleanup;
5917         closest->negsig = r.base;
5918         dns_rdataset_disassociate(&neg);
5919         dns_rdataset_disassociate(&negsig);
5920         newheader->closest = closest;
5921         return (ISC_R_SUCCESS);
5922
5923  cleanup:
5924         dns_rdataset_disassociate(&neg);
5925         dns_rdataset_disassociate(&negsig);
5926         free_noqname(mctx, &closest);
5927         return(result);
5928 }
5929
5930 static dns_dbmethods_t zone_methods;
5931
5932 static isc_result_t
5933 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5934             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
5935             dns_rdataset_t *addedrdataset)
5936 {
5937         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5938         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5939         rbtdb_version_t *rbtversion = version;
5940         isc_region_t region;
5941         rdatasetheader_t *newheader;
5942         rdatasetheader_t *header;
5943         isc_result_t result;
5944         isc_boolean_t delegating;
5945         isc_boolean_t tree_locked = ISC_FALSE;
5946
5947         REQUIRE(VALID_RBTDB(rbtdb));
5948
5949         if (rbtdb->common.methods == &zone_methods)
5950                 REQUIRE(((rbtnode->nsec3 &&
5951                           (rdataset->type == dns_rdatatype_nsec3 ||
5952                            rdataset->covers == dns_rdatatype_nsec3)) ||
5953                          (!rbtnode->nsec3 &&
5954                            rdataset->type != dns_rdatatype_nsec3 &&
5955                            rdataset->covers != dns_rdatatype_nsec3)));
5956
5957         if (rbtversion == NULL) {
5958                 if (now == 0)
5959                         isc_stdtime_get(&now);
5960         } else
5961                 now = 0;
5962
5963         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5964                                             &region,
5965                                             sizeof(rdatasetheader_t));
5966         if (result != ISC_R_SUCCESS)
5967                 return (result);
5968
5969         newheader = (rdatasetheader_t *)region.base;
5970         init_rdataset(rbtdb, newheader);
5971         set_ttl(rbtdb, newheader, rdataset->ttl + now);
5972         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5973                                                 rdataset->covers);
5974         newheader->attributes = 0;
5975         newheader->noqname = NULL;
5976         newheader->closest = NULL;
5977         newheader->count = init_count++;
5978         newheader->trust = rdataset->trust;
5979         newheader->additional_auth = NULL;
5980         newheader->additional_glue = NULL;
5981         newheader->last_used = now;
5982         newheader->node = rbtnode;
5983         if (rbtversion != NULL) {
5984                 newheader->serial = rbtversion->serial;
5985                 now = 0;
5986
5987                 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
5988                         newheader->attributes |= RDATASET_ATTR_RESIGN;
5989                         newheader->resign = rdataset->resign;
5990                 } else
5991                         newheader->resign = 0;
5992         } else {
5993                 newheader->serial = 1;
5994                 newheader->resign = 0;
5995                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
5996                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
5997                 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
5998                         newheader->attributes |= RDATASET_ATTR_OPTOUT;
5999                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6000                         result = addnoqname(rbtdb, newheader, rdataset);
6001                         if (result != ISC_R_SUCCESS) {
6002                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6003                                               newheader);
6004                                 return (result);
6005                         }
6006                 }
6007                 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6008                         result = addclosest(rbtdb, newheader, rdataset);
6009                         if (result != ISC_R_SUCCESS) {
6010                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6011                                               newheader);
6012                                 return (result);
6013                         }
6014                 }
6015         }
6016
6017         /*
6018          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6019          * just DNAME for the cache), then we need to set the callback bit
6020          * on the node.
6021          */
6022         if (delegating_type(rbtdb, rbtnode, rdataset->type))
6023                 delegating = ISC_TRUE;
6024         else
6025                 delegating = ISC_FALSE;
6026
6027         /*
6028          * If we're adding a delegation type or the DB is a cache in an overmem
6029          * state, hold an exclusive lock on the tree.  In the latter case
6030          * the lock does not necessarily have to be acquired but it will help
6031          * purge stale entries more effectively.
6032          */
6033         if (delegating || (IS_CACHE(rbtdb) && rbtdb->overmem)) {
6034                 tree_locked = ISC_TRUE;
6035                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6036         }
6037
6038         if (IS_CACHE(rbtdb) && rbtdb->overmem)
6039                 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6040
6041         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6042                   isc_rwlocktype_write);
6043
6044         if (rbtdb->rrsetstats != NULL) {
6045                 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6046                 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6047         }
6048
6049         if (IS_CACHE(rbtdb)) {
6050                 if (tree_locked)
6051                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6052
6053                 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6054                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6055                         expire_header(rbtdb, header, tree_locked);
6056
6057                 /*
6058                  * If we've been holding a write lock on the tree just for
6059                  * cleaning, we can release it now.  However, we still need the
6060                  * node lock.
6061                  */
6062                 if (tree_locked && !delegating) {
6063                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6064                         tree_locked = ISC_FALSE;
6065                 }
6066         }
6067
6068         result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
6069                      addedrdataset, now);
6070         if (result == ISC_R_SUCCESS && delegating)
6071                 rbtnode->find_callback = 1;
6072
6073         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6074                     isc_rwlocktype_write);
6075
6076         if (tree_locked)
6077                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6078
6079         /*
6080          * Update the zone's secure status.  If version is non-NULL
6081          * this is deferred until closeversion() is called.
6082          */
6083         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6084                 iszonesecure(db, version, rbtdb->origin_node);
6085
6086         return (result);
6087 }
6088
6089 static isc_result_t
6090 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6091                  dns_rdataset_t *rdataset, unsigned int options,
6092                  dns_rdataset_t *newrdataset)
6093 {
6094         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6095         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6096         rbtdb_version_t *rbtversion = version;
6097         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6098         unsigned char *subresult;
6099         isc_region_t region;
6100         isc_result_t result;
6101         rbtdb_changed_t *changed;
6102
6103         REQUIRE(VALID_RBTDB(rbtdb));
6104
6105         if (rbtdb->common.methods == &zone_methods)
6106                 REQUIRE(((rbtnode->nsec3 &&
6107                           (rdataset->type == dns_rdatatype_nsec3 ||
6108                            rdataset->covers == dns_rdatatype_nsec3)) ||
6109                          (!rbtnode->nsec3 &&
6110                            rdataset->type != dns_rdatatype_nsec3 &&
6111                            rdataset->covers != dns_rdatatype_nsec3)));
6112
6113         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6114                                             &region,
6115                                             sizeof(rdatasetheader_t));
6116         if (result != ISC_R_SUCCESS)
6117                 return (result);
6118         newheader = (rdatasetheader_t *)region.base;
6119         init_rdataset(rbtdb, newheader);
6120         set_ttl(rbtdb, newheader, rdataset->ttl);
6121         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6122                                                 rdataset->covers);
6123         newheader->attributes = 0;
6124         newheader->serial = rbtversion->serial;
6125         newheader->trust = 0;
6126         newheader->noqname = NULL;
6127         newheader->closest = NULL;
6128         newheader->count = init_count++;
6129         newheader->additional_auth = NULL;
6130         newheader->additional_glue = NULL;
6131         newheader->last_used = 0;
6132         newheader->node = rbtnode;
6133         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6134                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6135                 newheader->resign = rdataset->resign;
6136         } else
6137                 newheader->resign = 0;
6138
6139         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6140                   isc_rwlocktype_write);
6141
6142         changed = add_changed(rbtdb, rbtversion, rbtnode);
6143         if (changed == NULL) {
6144                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6145                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6146                             isc_rwlocktype_write);
6147                 return (ISC_R_NOMEMORY);
6148         }
6149
6150         topheader_prev = NULL;
6151         for (topheader = rbtnode->data;
6152              topheader != NULL;
6153              topheader = topheader->next) {
6154                 if (topheader->type == newheader->type)
6155                         break;
6156                 topheader_prev = topheader;
6157         }
6158         /*
6159          * If header isn't NULL, we've found the right type.  There may be
6160          * IGNORE rdatasets between the top of the chain and the first real
6161          * data.  We skip over them.
6162          */
6163         header = topheader;
6164         while (header != NULL && IGNORE(header))
6165                 header = header->down;
6166         if (header != NULL && EXISTS(header)) {
6167                 unsigned int flags = 0;
6168                 subresult = NULL;
6169                 result = ISC_R_SUCCESS;
6170                 if ((options & DNS_DBSUB_EXACT) != 0) {
6171                         flags |= DNS_RDATASLAB_EXACT;
6172                         if (newheader->rdh_ttl != header->rdh_ttl)
6173                                 result = DNS_R_NOTEXACT;
6174                 }
6175                 if (result == ISC_R_SUCCESS)
6176                         result = dns_rdataslab_subtract(
6177                                         (unsigned char *)header,
6178                                         (unsigned char *)newheader,
6179                                         (unsigned int)(sizeof(*newheader)),
6180                                         rbtdb->common.mctx,
6181                                         rbtdb->common.rdclass,
6182                                         (dns_rdatatype_t)header->type,
6183                                         flags, &subresult);
6184                 if (result == ISC_R_SUCCESS) {
6185                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6186                         newheader = (rdatasetheader_t *)subresult;
6187                         init_rdataset(rbtdb, newheader);
6188                         /*
6189                          * We have to set the serial since the rdataslab
6190                          * subtraction routine copies the reserved portion of
6191                          * header, not newheader.
6192                          */
6193                         newheader->serial = rbtversion->serial;
6194                         /*
6195                          * XXXJT: dns_rdataslab_subtract() copied the pointers
6196                          * to additional info.  We need to clear these fields
6197                          * to avoid having duplicated references.
6198                          */
6199                         newheader->additional_auth = NULL;
6200                         newheader->additional_glue = NULL;
6201                 } else if (result == DNS_R_NXRRSET) {
6202                         /*
6203                          * This subtraction would remove all of the rdata;
6204                          * add a nonexistent header instead.
6205                          */
6206                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6207                         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6208                         if (newheader == NULL) {
6209                                 result = ISC_R_NOMEMORY;
6210                                 goto unlock;
6211                         }
6212                         set_ttl(rbtdb, newheader, 0);
6213                         newheader->type = topheader->type;
6214                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6215                         newheader->trust = 0;
6216                         newheader->serial = rbtversion->serial;
6217                         newheader->noqname = NULL;
6218                         newheader->closest = NULL;
6219                         newheader->count = 0;
6220                         newheader->additional_auth = NULL;
6221                         newheader->additional_glue = NULL;
6222                         newheader->node = rbtnode;
6223                         newheader->resign = 0;
6224                         newheader->last_used = 0;
6225                 } else {
6226                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6227                         goto unlock;
6228                 }
6229
6230                 /*
6231                  * If we're here, we want to link newheader in front of
6232                  * topheader.
6233                  */
6234                 INSIST(rbtversion->serial >= topheader->serial);
6235                 if (topheader_prev != NULL)
6236                         topheader_prev->next = newheader;
6237                 else
6238                         rbtnode->data = newheader;
6239                 newheader->next = topheader->next;
6240                 newheader->down = topheader;
6241                 topheader->next = newheader;
6242                 rbtnode->dirty = 1;
6243                 changed->dirty = ISC_TRUE;
6244         } else {
6245                 /*
6246                  * The rdataset doesn't exist, so we don't need to do anything
6247                  * to satisfy the deletion request.
6248                  */
6249                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6250                 if ((options & DNS_DBSUB_EXACT) != 0)
6251                         result = DNS_R_NOTEXACT;
6252                 else
6253                         result = DNS_R_UNCHANGED;
6254         }
6255
6256         if (result == ISC_R_SUCCESS && newrdataset != NULL)
6257                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6258
6259  unlock:
6260         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6261                     isc_rwlocktype_write);
6262
6263         /*
6264          * Update the zone's secure status.  If version is non-NULL
6265          * this is deferred until closeversion() is called.
6266          */
6267         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6268                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6269
6270         return (result);
6271 }
6272
6273 static isc_result_t
6274 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6275                dns_rdatatype_t type, dns_rdatatype_t covers)
6276 {
6277         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6278         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6279         rbtdb_version_t *rbtversion = version;
6280         isc_result_t result;
6281         rdatasetheader_t *newheader;
6282
6283         REQUIRE(VALID_RBTDB(rbtdb));
6284
6285         if (type == dns_rdatatype_any)
6286                 return (ISC_R_NOTIMPLEMENTED);
6287         if (type == dns_rdatatype_rrsig && covers == 0)
6288                 return (ISC_R_NOTIMPLEMENTED);
6289
6290         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6291         if (newheader == NULL)
6292                 return (ISC_R_NOMEMORY);
6293         set_ttl(rbtdb, newheader, 0);
6294         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6295         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6296         newheader->trust = 0;
6297         newheader->noqname = NULL;
6298         newheader->closest = NULL;
6299         newheader->additional_auth = NULL;
6300         newheader->additional_glue = NULL;
6301         if (rbtversion != NULL)
6302                 newheader->serial = rbtversion->serial;
6303         else
6304                 newheader->serial = 0;
6305         newheader->count = 0;
6306         newheader->last_used = 0;
6307         newheader->node = rbtnode;
6308
6309         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6310                   isc_rwlocktype_write);
6311
6312         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6313                      ISC_FALSE, NULL, 0);
6314
6315         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6316                     isc_rwlocktype_write);
6317
6318         /*
6319          * Update the zone's secure status.  If version is non-NULL
6320          * this is deferred until closeversion() is called.
6321          */
6322         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6323                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6324
6325         return (result);
6326 }
6327
6328 static isc_result_t
6329 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6330         rbtdb_load_t *loadctx = arg;
6331         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6332         dns_rbtnode_t *node;
6333         isc_result_t result;
6334         isc_region_t region;
6335         rdatasetheader_t *newheader;
6336
6337         /*
6338          * This routine does no node locking.  See comments in
6339          * 'load' below for more information on loading and
6340          * locking.
6341          */
6342
6343
6344         /*
6345          * SOA records are only allowed at top of zone.
6346          */
6347         if (rdataset->type == dns_rdatatype_soa &&
6348             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6349                 return (DNS_R_NOTZONETOP);
6350
6351         if (rdataset->type != dns_rdatatype_nsec3 &&
6352             rdataset->covers != dns_rdatatype_nsec3)
6353                 add_empty_wildcards(rbtdb, name);
6354
6355         if (dns_name_iswildcard(name)) {
6356                 /*
6357                  * NS record owners cannot legally be wild cards.
6358                  */
6359                 if (rdataset->type == dns_rdatatype_ns)
6360                         return (DNS_R_INVALIDNS);
6361                 /*
6362                  * NSEC3 record owners cannot legally be wild cards.
6363                  */
6364                 if (rdataset->type == dns_rdatatype_nsec3)
6365                         return (DNS_R_INVALIDNSEC3);
6366                 result = add_wildcard_magic(rbtdb, name);
6367                 if (result != ISC_R_SUCCESS)
6368                         return (result);
6369         }
6370
6371         node = NULL;
6372         if (rdataset->type == dns_rdatatype_nsec3 ||
6373             rdataset->covers == dns_rdatatype_nsec3) {
6374                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6375                 if (result == ISC_R_SUCCESS)
6376                         node->nsec3 = 1;
6377         } else {
6378                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
6379                 if (result == ISC_R_SUCCESS)
6380                         node->nsec3 = 0;
6381         }
6382         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6383                 return (result);
6384         if (result != ISC_R_EXISTS) {
6385                 dns_name_t foundname;
6386                 dns_name_init(&foundname, NULL);
6387                 dns_rbt_namefromnode(node, &foundname);
6388 #ifdef DNS_RBT_USEHASH
6389                 node->locknum = node->hashval % rbtdb->node_lock_count;
6390 #else
6391                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6392                         rbtdb->node_lock_count;
6393 #endif
6394         }
6395
6396         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6397                                             &region,
6398                                             sizeof(rdatasetheader_t));
6399         if (result != ISC_R_SUCCESS)
6400                 return (result);
6401         newheader = (rdatasetheader_t *)region.base;
6402         init_rdataset(rbtdb, newheader);
6403         set_ttl(rbtdb, newheader,
6404                 rdataset->ttl + loadctx->now); /* XXX overflow check */
6405         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6406                                                 rdataset->covers);
6407         newheader->attributes = 0;
6408         newheader->trust = rdataset->trust;
6409         newheader->serial = 1;
6410         newheader->noqname = NULL;
6411         newheader->closest = NULL;
6412         newheader->count = init_count++;
6413         newheader->additional_auth = NULL;
6414         newheader->additional_glue = NULL;
6415         newheader->last_used = 0;
6416         newheader->node = node;
6417         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6418                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6419                 newheader->resign = rdataset->resign;
6420         } else
6421                 newheader->resign = 0;
6422
6423         result = add(rbtdb, node, rbtdb->current_version, newheader,
6424                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6425         if (result == ISC_R_SUCCESS &&
6426             delegating_type(rbtdb, node, rdataset->type))
6427                 node->find_callback = 1;
6428         else if (result == DNS_R_UNCHANGED)
6429                 result = ISC_R_SUCCESS;
6430
6431         return (result);
6432 }
6433
6434 static isc_result_t
6435 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6436         rbtdb_load_t *loadctx;
6437         dns_rbtdb_t *rbtdb;
6438
6439         rbtdb = (dns_rbtdb_t *)db;
6440
6441         REQUIRE(VALID_RBTDB(rbtdb));
6442
6443         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6444         if (loadctx == NULL)
6445                 return (ISC_R_NOMEMORY);
6446
6447         loadctx->rbtdb = rbtdb;
6448         if (IS_CACHE(rbtdb))
6449                 isc_stdtime_get(&loadctx->now);
6450         else
6451                 loadctx->now = 0;
6452
6453         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6454
6455         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6456                 == 0);
6457         rbtdb->attributes |= RBTDB_ATTR_LOADING;
6458
6459         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6460
6461         *addp = loading_addrdataset;
6462         *dbloadp = loadctx;
6463
6464         return (ISC_R_SUCCESS);
6465 }
6466
6467 static isc_result_t
6468 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6469         rbtdb_load_t *loadctx;
6470         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6471
6472         REQUIRE(VALID_RBTDB(rbtdb));
6473         REQUIRE(dbloadp != NULL);
6474         loadctx = *dbloadp;
6475         REQUIRE(loadctx->rbtdb == rbtdb);
6476
6477         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6478
6479         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6480         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6481
6482         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6483         rbtdb->attributes |= RBTDB_ATTR_LOADED;
6484
6485         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6486
6487         /*
6488          * If there's a KEY rdataset at the zone origin containing a
6489          * zone key, we consider the zone secure.
6490          */
6491         if (! IS_CACHE(rbtdb))
6492                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6493
6494         *dbloadp = NULL;
6495
6496         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6497
6498         return (ISC_R_SUCCESS);
6499 }
6500
6501 static isc_result_t
6502 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6503      dns_masterformat_t masterformat) {
6504         dns_rbtdb_t *rbtdb;
6505
6506         rbtdb = (dns_rbtdb_t *)db;
6507
6508         REQUIRE(VALID_RBTDB(rbtdb));
6509
6510         return (dns_master_dump2(rbtdb->common.mctx, db, version,
6511                                  &dns_master_style_default,
6512                                  filename, masterformat));
6513 }
6514
6515 static void
6516 delete_callback(void *data, void *arg) {
6517         dns_rbtdb_t *rbtdb = arg;
6518         rdatasetheader_t *current, *next;
6519         unsigned int locknum;
6520
6521         current = data;
6522         locknum = current->node->locknum;
6523         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6524         while (current != NULL) {
6525                 next = current->next;
6526                 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6527                 current = next;
6528         }
6529         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6530 }
6531
6532 static isc_boolean_t
6533 issecure(dns_db_t *db) {
6534         dns_rbtdb_t *rbtdb;
6535         isc_boolean_t secure;
6536
6537         rbtdb = (dns_rbtdb_t *)db;
6538
6539         REQUIRE(VALID_RBTDB(rbtdb));
6540
6541         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6542         secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6543         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6544
6545         return (secure);
6546 }
6547
6548 static isc_boolean_t
6549 isdnssec(dns_db_t *db) {
6550         dns_rbtdb_t *rbtdb;
6551         isc_boolean_t dnssec;
6552
6553         rbtdb = (dns_rbtdb_t *)db;
6554
6555         REQUIRE(VALID_RBTDB(rbtdb));
6556
6557         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6558         dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6559         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6560
6561         return (dnssec);
6562 }
6563
6564 static unsigned int
6565 nodecount(dns_db_t *db) {
6566         dns_rbtdb_t *rbtdb;
6567         unsigned int count;
6568
6569         rbtdb = (dns_rbtdb_t *)db;
6570
6571         REQUIRE(VALID_RBTDB(rbtdb));
6572
6573         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6574         count = dns_rbt_nodecount(rbtdb->tree);
6575         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6576
6577         return (count);
6578 }
6579
6580 static void
6581 settask(dns_db_t *db, isc_task_t *task) {
6582         dns_rbtdb_t *rbtdb;
6583
6584         rbtdb = (dns_rbtdb_t *)db;
6585
6586         REQUIRE(VALID_RBTDB(rbtdb));
6587
6588         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6589         if (rbtdb->task != NULL)
6590                 isc_task_detach(&rbtdb->task);
6591         if (task != NULL)
6592                 isc_task_attach(task, &rbtdb->task);
6593         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6594 }
6595
6596 static isc_boolean_t
6597 ispersistent(dns_db_t *db) {
6598         UNUSED(db);
6599         return (ISC_FALSE);
6600 }
6601
6602 static isc_result_t
6603 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6604         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6605         dns_rbtnode_t *onode;
6606         isc_result_t result = ISC_R_SUCCESS;
6607
6608         REQUIRE(VALID_RBTDB(rbtdb));
6609         REQUIRE(nodep != NULL && *nodep == NULL);
6610
6611         /* Note that the access to origin_node doesn't require a DB lock */
6612         onode = (dns_rbtnode_t *)rbtdb->origin_node;
6613         if (onode != NULL) {
6614                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6615                 new_reference(rbtdb, onode);
6616                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6617
6618                 *nodep = rbtdb->origin_node;
6619         } else {
6620                 INSIST(IS_CACHE(rbtdb));
6621                 result = ISC_R_NOTFOUND;
6622         }
6623
6624         return (result);
6625 }
6626
6627 static isc_result_t
6628 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6629                    isc_uint8_t *flags, isc_uint16_t *iterations,
6630                    unsigned char *salt, size_t *salt_length)
6631 {
6632         dns_rbtdb_t *rbtdb;
6633         isc_result_t result = ISC_R_NOTFOUND;
6634         rbtdb_version_t *rbtversion = version;
6635
6636         rbtdb = (dns_rbtdb_t *)db;
6637
6638         REQUIRE(VALID_RBTDB(rbtdb));
6639
6640         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6641
6642         if (rbtversion == NULL)
6643                 rbtversion = rbtdb->current_version;
6644
6645         if (rbtversion->havensec3) {
6646                 if (hash != NULL)
6647                         *hash = rbtversion->hash;
6648                 if (salt != NULL && salt_length != NULL) {
6649                         REQUIRE(*salt_length >= rbtversion->salt_length);
6650                         memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6651                 }
6652                 if (salt_length != NULL)
6653                         *salt_length = rbtversion->salt_length;
6654                 if (iterations != NULL)
6655                         *iterations = rbtversion->iterations;
6656                 if (flags != NULL)
6657                         *flags = rbtversion->flags;
6658                 result = ISC_R_SUCCESS;
6659         }
6660         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6661
6662         return (result);
6663 }
6664
6665 static isc_result_t
6666 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6667         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6668         isc_stdtime_t oldresign;
6669         isc_result_t result = ISC_R_SUCCESS;
6670         rdatasetheader_t *header;
6671
6672         REQUIRE(VALID_RBTDB(rbtdb));
6673         REQUIRE(!IS_CACHE(rbtdb));
6674         REQUIRE(rdataset != NULL);
6675
6676         header = rdataset->private3;
6677         header--;
6678
6679         NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6680                   isc_rwlocktype_write);
6681
6682         oldresign = header->resign;
6683         header->resign = resign;
6684         if (header->heap_index != 0) {
6685                 INSIST(RESIGN(header));
6686                 if (resign == 0) {
6687                         isc_heap_delete(rbtdb->heaps[header->node->locknum],
6688                                         header->heap_index);
6689                         header->heap_index = 0;
6690                 } else if (resign < oldresign)
6691                         isc_heap_increased(rbtdb->heaps[header->node->locknum],
6692                                            header->heap_index);
6693                 else
6694                         isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6695                                            header->heap_index);
6696         } else if (resign && header->heap_index == 0) {
6697                 header->attributes |= RDATASET_ATTR_RESIGN;
6698                 result = resign_insert(rbtdb, header->node->locknum, header);
6699         }
6700         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6701                     isc_rwlocktype_write);
6702         return (result);
6703 }
6704
6705 static isc_result_t
6706 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
6707                dns_name_t *foundname)
6708 {
6709         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6710         rdatasetheader_t *header = NULL, *this;
6711         unsigned int i;
6712         isc_result_t result = ISC_R_NOTFOUND;
6713         unsigned int locknum;
6714
6715         REQUIRE(VALID_RBTDB(rbtdb));
6716
6717         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
6718
6719         for (i = 0; i < rbtdb->node_lock_count; i++) {
6720                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
6721                 this = isc_heap_element(rbtdb->heaps[i], 1);
6722                 if (this == NULL) {
6723                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6724                                     isc_rwlocktype_read);
6725                         continue;
6726                 }
6727                 if (header == NULL)
6728                         header = this;
6729                 else if (isc_serial_lt(this->resign, header->resign)) {
6730                         locknum = header->node->locknum;
6731                         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
6732                                     isc_rwlocktype_read);
6733                         header = this;
6734                 } else
6735                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6736                                     isc_rwlocktype_read);
6737         }
6738
6739         if (header == NULL)
6740                 goto unlock;
6741
6742         bind_rdataset(rbtdb, header->node, header, 0, rdataset);
6743
6744         if (foundname != NULL)
6745                 dns_rbt_fullnamefromnode(header->node, foundname);
6746
6747         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6748                     isc_rwlocktype_read);
6749
6750         result = ISC_R_SUCCESS;
6751
6752  unlock:
6753         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
6754
6755         return (result);
6756 }
6757
6758 static void
6759 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
6760 {
6761         rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
6762         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6763         dns_rbtnode_t *node;
6764         rdatasetheader_t *header;
6765
6766         REQUIRE(VALID_RBTDB(rbtdb));
6767         REQUIRE(rdataset != NULL);
6768         REQUIRE(rbtdb->future_version == rbtversion);
6769         REQUIRE(rbtversion->writer);
6770
6771         node = rdataset->private2;
6772         header = rdataset->private3;
6773         header--;
6774
6775         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6776         NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
6777                   isc_rwlocktype_write);
6778         /*
6779          * Delete from heap and save to re-signed list so that it can
6780          * be restored if we backout of this change.
6781          */
6782         new_reference(rbtdb, node);
6783         isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
6784         header->heap_index = 0;
6785         ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
6786
6787         NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
6788                     isc_rwlocktype_write);
6789         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6790 }
6791
6792 static dns_stats_t *
6793 getrrsetstats(dns_db_t *db) {
6794         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6795
6796         REQUIRE(VALID_RBTDB(rbtdb));
6797         REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6798
6799         return (rbtdb->rrsetstats);
6800 }
6801
6802 static dns_dbmethods_t zone_methods = {
6803         attach,
6804         detach,
6805         beginload,
6806         endload,
6807         dump,
6808         currentversion,
6809         newversion,
6810         attachversion,
6811         closeversion,
6812         findnode,
6813         zone_find,
6814         zone_findzonecut,
6815         attachnode,
6816         detachnode,
6817         expirenode,
6818         printnode,
6819         createiterator,
6820         zone_findrdataset,
6821         allrdatasets,
6822         addrdataset,
6823         subtractrdataset,
6824         deleterdataset,
6825         issecure,
6826         nodecount,
6827         ispersistent,
6828         overmem,
6829         settask,
6830         getoriginnode,
6831         NULL,
6832         getnsec3parameters,
6833         findnsec3node,
6834         setsigningtime,
6835         getsigningtime,
6836         resigned,
6837         isdnssec,
6838         NULL
6839 };
6840
6841 static dns_dbmethods_t cache_methods = {
6842         attach,
6843         detach,
6844         beginload,
6845         endload,
6846         dump,
6847         currentversion,
6848         newversion,
6849         attachversion,
6850         closeversion,
6851         findnode,
6852         cache_find,
6853         cache_findzonecut,
6854         attachnode,
6855         detachnode,
6856         expirenode,
6857         printnode,
6858         createiterator,
6859         cache_findrdataset,
6860         allrdatasets,
6861         addrdataset,
6862         subtractrdataset,
6863         deleterdataset,
6864         issecure,
6865         nodecount,
6866         ispersistent,
6867         overmem,
6868         settask,
6869         getoriginnode,
6870         NULL,
6871         NULL,
6872         NULL,
6873         NULL,
6874         NULL,
6875         NULL,
6876         isdnssec,
6877         getrrsetstats
6878 };
6879
6880 isc_result_t
6881 #ifdef DNS_RBTDB_VERSION64
6882 dns_rbtdb64_create
6883 #else
6884 dns_rbtdb_create
6885 #endif
6886                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6887                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6888                  void *driverarg, dns_db_t **dbp)
6889 {
6890         dns_rbtdb_t *rbtdb;
6891         isc_result_t result;
6892         int i;
6893         dns_name_t name;
6894         isc_boolean_t (*sooner)(void *, void *);
6895
6896         /* Keep the compiler happy. */
6897         UNUSED(argc);
6898         UNUSED(argv);
6899         UNUSED(driverarg);
6900
6901         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
6902         if (rbtdb == NULL)
6903                 return (ISC_R_NOMEMORY);
6904
6905         memset(rbtdb, '\0', sizeof(*rbtdb));
6906         dns_name_init(&rbtdb->common.origin, NULL);
6907         rbtdb->common.attributes = 0;
6908         if (type == dns_dbtype_cache) {
6909                 rbtdb->common.methods = &cache_methods;
6910                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
6911         } else if (type == dns_dbtype_stub) {
6912                 rbtdb->common.methods = &zone_methods;
6913                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
6914         } else
6915                 rbtdb->common.methods = &zone_methods;
6916         rbtdb->common.rdclass = rdclass;
6917         rbtdb->common.mctx = NULL;
6918
6919         result = RBTDB_INITLOCK(&rbtdb->lock);
6920         if (result != ISC_R_SUCCESS)
6921                 goto cleanup_rbtdb;
6922
6923         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
6924         if (result != ISC_R_SUCCESS)
6925                 goto cleanup_lock;
6926
6927         /*
6928          * Initialize node_lock_count in a generic way to support future
6929          * extension which allows the user to specify this value on creation.
6930          * Note that when specified for a cache DB it must be larger than 1
6931          * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
6932          */
6933         if (rbtdb->node_lock_count == 0) {
6934                 if (IS_CACHE(rbtdb))
6935                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
6936                 else
6937                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
6938         } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
6939                 result = ISC_R_RANGE;
6940                 goto cleanup_tree_lock;
6941         }
6942         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
6943         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
6944                                         sizeof(rbtdb_nodelock_t));
6945         if (rbtdb->node_locks == NULL) {
6946                 result = ISC_R_NOMEMORY;
6947                 goto cleanup_tree_lock;
6948         }
6949
6950         rbtdb->rrsetstats = NULL;
6951         if (IS_CACHE(rbtdb)) {
6952                 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
6953                 if (result != ISC_R_SUCCESS)
6954                         goto cleanup_node_locks;
6955                 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
6956                                                sizeof(rdatasetheaderlist_t));
6957                 if (rbtdb->rdatasets == NULL) {
6958                         result = ISC_R_NOMEMORY;
6959                         goto cleanup_rrsetstats;
6960                 }
6961                 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6962                         ISC_LIST_INIT(rbtdb->rdatasets[i]);
6963         } else
6964                 rbtdb->rdatasets = NULL;
6965
6966         /*
6967          * Create the heaps.
6968          */
6969         rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
6970                                    sizeof(isc_heap_t *));
6971         if (rbtdb->heaps == NULL) {
6972                 result = ISC_R_NOMEMORY;
6973                 goto cleanup_rdatasets;
6974         }
6975         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6976                 rbtdb->heaps[i] = NULL;
6977         sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
6978         for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
6979                 result = isc_heap_create(mctx, sooner, set_index, 0,
6980                                          &rbtdb->heaps[i]);
6981                 if (result != ISC_R_SUCCESS)
6982                         goto cleanup_heaps;
6983         }
6984
6985         /*
6986          * Create deadnode lists.
6987          */
6988         rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
6989                                        sizeof(rbtnodelist_t));
6990         if (rbtdb->deadnodes == NULL) {
6991                 result = ISC_R_NOMEMORY;
6992                 goto cleanup_heaps;
6993         }
6994         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6995                 ISC_LIST_INIT(rbtdb->deadnodes[i]);
6996
6997         rbtdb->active = rbtdb->node_lock_count;
6998
6999         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7000                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7001                 if (result == ISC_R_SUCCESS) {
7002                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7003                         if (result != ISC_R_SUCCESS)
7004                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7005                 }
7006                 if (result != ISC_R_SUCCESS) {
7007                         while (i-- > 0) {
7008                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7009                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7010                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7011                         }
7012                         goto cleanup_deadnodes;
7013                 }
7014                 rbtdb->node_locks[i].exiting = ISC_FALSE;
7015         }
7016
7017         /*
7018          * Attach to the mctx.  The database will persist so long as there
7019          * are references to it, and attaching to the mctx ensures that our
7020          * mctx won't disappear out from under us.
7021          */
7022         isc_mem_attach(mctx, &rbtdb->common.mctx);
7023
7024         /*
7025          * Must be initialized before free_rbtdb() is called.
7026          */
7027         isc_ondestroy_init(&rbtdb->common.ondest);
7028
7029         /*
7030          * Make a copy of the origin name.
7031          */
7032         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7033         if (result != ISC_R_SUCCESS) {
7034                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7035                 return (result);
7036         }
7037
7038         /*
7039          * Make the Red-Black Trees.
7040          */
7041         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7042         if (result != ISC_R_SUCCESS) {
7043                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7044                 return (result);
7045         }
7046
7047         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7048         if (result != ISC_R_SUCCESS) {
7049                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7050                 return (result);
7051         }
7052
7053         /*
7054          * In order to set the node callback bit correctly in zone databases,
7055          * we need to know if the node has the origin name of the zone.
7056          * In loading_addrdataset() we could simply compare the new name
7057          * to the origin name, but this is expensive.  Also, we don't know the
7058          * node name in addrdataset(), so we need another way of knowing the
7059          * zone's top.
7060          *
7061          * We now explicitly create a node for the zone's origin, and then
7062          * we simply remember the node's address.  This is safe, because
7063          * the top-of-zone node can never be deleted, nor can its address
7064          * change.
7065          */
7066         if (!IS_CACHE(rbtdb)) {
7067                 rbtdb->origin_node = NULL;
7068                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7069                                          &rbtdb->origin_node);
7070                 if (result != ISC_R_SUCCESS) {
7071                         INSIST(result != ISC_R_EXISTS);
7072                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7073                         return (result);
7074                 }
7075                 rbtdb->origin_node->nsec3 = 0;
7076                 /*
7077                  * We need to give the origin node the right locknum.
7078                  */
7079                 dns_name_init(&name, NULL);
7080                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7081 #ifdef DNS_RBT_USEHASH
7082                 rbtdb->origin_node->locknum =
7083                         rbtdb->origin_node->hashval %
7084                         rbtdb->node_lock_count;
7085 #else
7086                 rbtdb->origin_node->locknum =
7087                         dns_name_hash(&name, ISC_TRUE) %
7088                         rbtdb->node_lock_count;
7089 #endif
7090         }
7091
7092         /*
7093          * Misc. Initialization.
7094          */
7095         result = isc_refcount_init(&rbtdb->references, 1);
7096         if (result != ISC_R_SUCCESS) {
7097                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7098                 return (result);
7099         }
7100         rbtdb->attributes = 0;
7101         rbtdb->overmem = ISC_FALSE;
7102         rbtdb->task = NULL;
7103
7104         /*
7105          * Version Initialization.
7106          */
7107         rbtdb->current_serial = 1;
7108         rbtdb->least_serial = 1;
7109         rbtdb->next_serial = 2;
7110         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7111         if (rbtdb->current_version == NULL) {
7112                 isc_refcount_decrement(&rbtdb->references, NULL);
7113                 isc_refcount_destroy(&rbtdb->references);
7114                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7115                 return (ISC_R_NOMEMORY);
7116         }
7117         rbtdb->current_version->secure = dns_db_insecure;
7118         rbtdb->current_version->havensec3 = ISC_FALSE;
7119         rbtdb->current_version->flags = 0;
7120         rbtdb->current_version->iterations = 0;
7121         rbtdb->current_version->hash = 0;
7122         rbtdb->current_version->salt_length = 0;
7123         memset(rbtdb->current_version->salt, 0,
7124                sizeof(rbtdb->current_version->salt));
7125         rbtdb->future_version = NULL;
7126         ISC_LIST_INIT(rbtdb->open_versions);
7127         /*
7128          * Keep the current version in the open list so that list operation
7129          * won't happen in normal lookup operations.
7130          */
7131         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7132
7133         rbtdb->common.magic = DNS_DB_MAGIC;
7134         rbtdb->common.impmagic = RBTDB_MAGIC;
7135
7136         *dbp = (dns_db_t *)rbtdb;
7137
7138         return (ISC_R_SUCCESS);
7139
7140  cleanup_deadnodes:
7141         isc_mem_put(mctx, rbtdb->deadnodes,
7142                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7143
7144  cleanup_heaps:
7145         if (rbtdb->heaps != NULL) {
7146                 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7147                         if (rbtdb->heaps[i] != NULL)
7148                                 isc_heap_destroy(&rbtdb->heaps[i]);
7149                 isc_mem_put(mctx, rbtdb->heaps,
7150                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
7151         }
7152
7153  cleanup_rdatasets:
7154         if (rbtdb->rdatasets != NULL)
7155                 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7156                             sizeof(rdatasetheaderlist_t));
7157  cleanup_rrsetstats:
7158         if (rbtdb->rrsetstats != NULL)
7159                 dns_stats_detach(&rbtdb->rrsetstats);
7160
7161  cleanup_node_locks:
7162         isc_mem_put(mctx, rbtdb->node_locks,
7163                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7164
7165  cleanup_tree_lock:
7166         isc_rwlock_destroy(&rbtdb->tree_lock);
7167
7168  cleanup_lock:
7169         RBTDB_DESTROYLOCK(&rbtdb->lock);
7170
7171  cleanup_rbtdb:
7172         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
7173         return (result);
7174 }
7175
7176
7177 /*
7178  * Slabbed Rdataset Methods
7179  */
7180
7181 static void
7182 rdataset_disassociate(dns_rdataset_t *rdataset) {
7183         dns_db_t *db = rdataset->private1;
7184         dns_dbnode_t *node = rdataset->private2;
7185
7186         detachnode(db, &node);
7187 }
7188
7189 static isc_result_t
7190 rdataset_first(dns_rdataset_t *rdataset) {
7191         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7192         unsigned int count;
7193
7194         count = raw[0] * 256 + raw[1];
7195         if (count == 0) {
7196                 rdataset->private5 = NULL;
7197                 return (ISC_R_NOMORE);
7198         }
7199
7200 #if DNS_RDATASET_FIXED
7201         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7202                 raw += 2 + (4 * count);
7203         else
7204 #endif
7205                 raw += 2;
7206
7207         /*
7208          * The privateuint4 field is the number of rdata beyond the
7209          * cursor position, so we decrement the total count by one
7210          * before storing it.
7211          *
7212          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7213          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7214          * to the first entry in the offset table.
7215          */
7216         count--;
7217         rdataset->privateuint4 = count;
7218         rdataset->private5 = raw;
7219
7220         return (ISC_R_SUCCESS);
7221 }
7222
7223 static isc_result_t
7224 rdataset_next(dns_rdataset_t *rdataset) {
7225         unsigned int count;
7226         unsigned int length;
7227         unsigned char *raw;     /* RDATASLAB */
7228
7229         count = rdataset->privateuint4;
7230         if (count == 0)
7231                 return (ISC_R_NOMORE);
7232         count--;
7233         rdataset->privateuint4 = count;
7234
7235         /*
7236          * Skip forward one record (length + 4) or one offset (4).
7237          */
7238         raw = rdataset->private5;
7239 #if DNS_RDATASET_FIXED
7240         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7241 #endif
7242                 length = raw[0] * 256 + raw[1];
7243                 raw += length;
7244 #if DNS_RDATASET_FIXED
7245         }
7246         rdataset->private5 = raw + 4;           /* length(2) + order(2) */
7247 #else
7248         rdataset->private5 = raw + 2;           /* length(2) */
7249 #endif
7250
7251         return (ISC_R_SUCCESS);
7252 }
7253
7254 static void
7255 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7256         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
7257 #if DNS_RDATASET_FIXED
7258         unsigned int offset;
7259 #endif
7260         unsigned int length;
7261         isc_region_t r;
7262         unsigned int flags = 0;
7263
7264         REQUIRE(raw != NULL);
7265
7266         /*
7267          * Find the start of the record if not already in private5
7268          * then skip the length and order fields.
7269          */
7270 #if DNS_RDATASET_FIXED
7271         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7272                 offset = (raw[0] << 24) + (raw[1] << 16) +
7273                          (raw[2] << 8) + raw[3];
7274                 raw = rdataset->private3;
7275                 raw += offset;
7276         }
7277 #endif
7278         length = raw[0] * 256 + raw[1];
7279 #if DNS_RDATASET_FIXED
7280         raw += 4;
7281 #else
7282         raw += 2;
7283 #endif
7284         if (rdataset->type == dns_rdatatype_rrsig) {
7285                 if (*raw & DNS_RDATASLAB_OFFLINE)
7286                         flags |= DNS_RDATA_OFFLINE;
7287                 length--;
7288                 raw++;
7289         }
7290         r.length = length;
7291         r.base = raw;
7292         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7293         rdata->flags |= flags;
7294 }
7295
7296 static void
7297 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7298         dns_db_t *db = source->private1;
7299         dns_dbnode_t *node = source->private2;
7300         dns_dbnode_t *cloned_node = NULL;
7301
7302         attachnode(db, node, &cloned_node);
7303         *target = *source;
7304
7305         /*
7306          * Reset iterator state.
7307          */
7308         target->privateuint4 = 0;
7309         target->private5 = NULL;
7310 }
7311
7312 static unsigned int
7313 rdataset_count(dns_rdataset_t *rdataset) {
7314         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7315         unsigned int count;
7316
7317         count = raw[0] * 256 + raw[1];
7318
7319         return (count);
7320 }
7321
7322 static isc_result_t
7323 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7324                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7325 {
7326         dns_db_t *db = rdataset->private1;
7327         dns_dbnode_t *node = rdataset->private2;
7328         dns_dbnode_t *cloned_node;
7329         struct noqname *noqname = rdataset->private6;
7330
7331         cloned_node = NULL;
7332         attachnode(db, node, &cloned_node);
7333         nsec->methods = &rdataset_methods;
7334         nsec->rdclass = db->rdclass;
7335         nsec->type = noqname->type;
7336         nsec->covers = 0;
7337         nsec->ttl = rdataset->ttl;
7338         nsec->trust = rdataset->trust;
7339         nsec->private1 = rdataset->private1;
7340         nsec->private2 = rdataset->private2;
7341         nsec->private3 = noqname->neg;
7342         nsec->privateuint4 = 0;
7343         nsec->private5 = NULL;
7344         nsec->private6 = NULL;
7345         nsec->private7 = NULL;
7346
7347         cloned_node = NULL;
7348         attachnode(db, node, &cloned_node);
7349         nsecsig->methods = &rdataset_methods;
7350         nsecsig->rdclass = db->rdclass;
7351         nsecsig->type = dns_rdatatype_rrsig;
7352         nsecsig->covers = noqname->type;
7353         nsecsig->ttl = rdataset->ttl;
7354         nsecsig->trust = rdataset->trust;
7355         nsecsig->private1 = rdataset->private1;
7356         nsecsig->private2 = rdataset->private2;
7357         nsecsig->private3 = noqname->negsig;
7358         nsecsig->privateuint4 = 0;
7359         nsecsig->private5 = NULL;
7360         nsec->private6 = NULL;
7361         nsec->private7 = NULL;
7362
7363         dns_name_clone(&noqname->name, name);
7364
7365         return (ISC_R_SUCCESS);
7366 }
7367
7368 static isc_result_t
7369 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7370                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7371 {
7372         dns_db_t *db = rdataset->private1;
7373         dns_dbnode_t *node = rdataset->private2;
7374         dns_dbnode_t *cloned_node;
7375         struct noqname *closest = rdataset->private7;
7376
7377         cloned_node = NULL;
7378         attachnode(db, node, &cloned_node);
7379         nsec->methods = &rdataset_methods;
7380         nsec->rdclass = db->rdclass;
7381         nsec->type = closest->type;
7382         nsec->covers = 0;
7383         nsec->ttl = rdataset->ttl;
7384         nsec->trust = rdataset->trust;
7385         nsec->private1 = rdataset->private1;
7386         nsec->private2 = rdataset->private2;
7387         nsec->private3 = closest->neg;
7388         nsec->privateuint4 = 0;
7389         nsec->private5 = NULL;
7390         nsec->private6 = NULL;
7391         nsec->private7 = NULL;
7392
7393         cloned_node = NULL;
7394         attachnode(db, node, &cloned_node);
7395         nsecsig->methods = &rdataset_methods;
7396         nsecsig->rdclass = db->rdclass;
7397         nsecsig->type = dns_rdatatype_rrsig;
7398         nsecsig->covers = closest->type;
7399         nsecsig->ttl = rdataset->ttl;
7400         nsecsig->trust = rdataset->trust;
7401         nsecsig->private1 = rdataset->private1;
7402         nsecsig->private2 = rdataset->private2;
7403         nsecsig->private3 = closest->negsig;
7404         nsecsig->privateuint4 = 0;
7405         nsecsig->private5 = NULL;
7406         nsec->private6 = NULL;
7407         nsec->private7 = NULL;
7408
7409         dns_name_clone(&closest->name, name);
7410
7411         return (ISC_R_SUCCESS);
7412 }
7413
7414 static void
7415 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
7416         dns_rbtdb_t *rbtdb = rdataset->private1;
7417         dns_rbtnode_t *rbtnode = rdataset->private2;
7418         rdatasetheader_t *header = rdataset->private3;
7419
7420         header--;
7421         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7422                   isc_rwlocktype_write);
7423         header->trust = rdataset->trust = trust;
7424         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7425                   isc_rwlocktype_write);
7426 }
7427
7428 static void
7429 rdataset_expire(dns_rdataset_t *rdataset) {
7430         dns_rbtdb_t *rbtdb = rdataset->private1;
7431         dns_rbtnode_t *rbtnode = rdataset->private2;
7432         rdatasetheader_t *header = rdataset->private3;
7433
7434         header--;
7435         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7436                   isc_rwlocktype_write);
7437         expire_header(rbtdb, header, ISC_FALSE);
7438         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7439                   isc_rwlocktype_write);
7440 }
7441
7442 /*
7443  * Rdataset Iterator Methods
7444  */
7445
7446 static void
7447 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7448         rbtdb_rdatasetiter_t *rbtiterator;
7449
7450         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7451
7452         if (rbtiterator->common.version != NULL)
7453                 closeversion(rbtiterator->common.db,
7454                              &rbtiterator->common.version, ISC_FALSE);
7455         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7456         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7457                     sizeof(*rbtiterator));
7458
7459         *iteratorp = NULL;
7460 }
7461
7462 static isc_result_t
7463 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7464         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7465         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7466         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7467         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7468         rdatasetheader_t *header, *top_next;
7469         rbtdb_serial_t serial;
7470         isc_stdtime_t now;
7471
7472         if (IS_CACHE(rbtdb)) {
7473                 serial = 1;
7474                 now = rbtiterator->common.now;
7475         } else {
7476                 serial = rbtversion->serial;
7477                 now = 0;
7478         }
7479
7480         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7481                   isc_rwlocktype_read);
7482
7483         for (header = rbtnode->data; header != NULL; header = top_next) {
7484                 top_next = header->next;
7485                 do {
7486                         if (header->serial <= serial && !IGNORE(header)) {
7487                                 /*
7488                                  * Is this a "this rdataset doesn't exist"
7489                                  * record?  Or is it too old in the cache?
7490                                  *
7491                                  * Note: unlike everywhere else, we
7492                                  * check for now > header->rdh_ttl instead
7493                                  * of now >= header->rdh_ttl.  This allows
7494                                  * ANY and RRSIG queries for 0 TTL
7495                                  * rdatasets to work.
7496                                  */
7497                                 if (NONEXISTENT(header) ||
7498                                     (now != 0 && now > header->rdh_ttl))
7499                                         header = NULL;
7500                                 break;
7501                         } else
7502                                 header = header->down;
7503                 } while (header != NULL);
7504                 if (header != NULL)
7505                         break;
7506         }
7507
7508         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7509                     isc_rwlocktype_read);
7510
7511         rbtiterator->current = header;
7512
7513         if (header == NULL)
7514                 return (ISC_R_NOMORE);
7515
7516         return (ISC_R_SUCCESS);
7517 }
7518
7519 static isc_result_t
7520 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7521         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7522         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7523         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7524         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7525         rdatasetheader_t *header, *top_next;
7526         rbtdb_serial_t serial;
7527         isc_stdtime_t now;
7528         rbtdb_rdatatype_t type, negtype;
7529         dns_rdatatype_t rdtype, covers;
7530
7531         header = rbtiterator->current;
7532         if (header == NULL)
7533                 return (ISC_R_NOMORE);
7534
7535         if (IS_CACHE(rbtdb)) {
7536                 serial = 1;
7537                 now = rbtiterator->common.now;
7538         } else {
7539                 serial = rbtversion->serial;
7540                 now = 0;
7541         }
7542
7543         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7544                   isc_rwlocktype_read);
7545
7546         type = header->type;
7547         rdtype = RBTDB_RDATATYPE_BASE(header->type);
7548         if (rdtype == 0) {
7549                 covers = RBTDB_RDATATYPE_EXT(header->type);
7550                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7551         } else
7552                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7553         for (header = header->next; header != NULL; header = top_next) {
7554                 top_next = header->next;
7555                 /*
7556                  * If not walking back up the down list.
7557                  */
7558                 if (header->type != type && header->type != negtype) {
7559                         do {
7560                                 if (header->serial <= serial &&
7561                                     !IGNORE(header)) {
7562                                         /*
7563                                          * Is this a "this rdataset doesn't
7564                                          * exist" record?
7565                                          *
7566                                          * Note: unlike everywhere else, we
7567                                          * check for now > header->ttl instead
7568                                          * of now >= header->ttl.  This allows
7569                                          * ANY and RRSIG queries for 0 TTL
7570                                          * rdatasets to work.
7571                                          */
7572                                         if ((header->attributes &
7573                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
7574                                             (now != 0 && now > header->rdh_ttl))
7575                                                 header = NULL;
7576                                         break;
7577                                 } else
7578                                         header = header->down;
7579                         } while (header != NULL);
7580                         if (header != NULL)
7581                                 break;
7582                 }
7583         }
7584
7585         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7586                     isc_rwlocktype_read);
7587
7588         rbtiterator->current = header;
7589
7590         if (header == NULL)
7591                 return (ISC_R_NOMORE);
7592
7593         return (ISC_R_SUCCESS);
7594 }
7595
7596 static void
7597 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7598         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7599         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7600         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7601         rdatasetheader_t *header;
7602
7603         header = rbtiterator->current;
7604         REQUIRE(header != NULL);
7605
7606         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7607                   isc_rwlocktype_read);
7608
7609         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7610                       rdataset);
7611
7612         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7613                     isc_rwlocktype_read);
7614 }
7615
7616
7617 /*
7618  * Database Iterator Methods
7619  */
7620
7621 static inline void
7622 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7623         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7624         dns_rbtnode_t *node = rbtdbiter->node;
7625
7626         if (node == NULL)
7627                 return;
7628
7629         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7630         reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7631 }
7632
7633 static inline void
7634 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7635         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7636         dns_rbtnode_t *node = rbtdbiter->node;
7637         nodelock_t *lock;
7638
7639         if (node == NULL)
7640                 return;
7641
7642         lock = &rbtdb->node_locks[node->locknum].lock;
7643         NODE_LOCK(lock, isc_rwlocktype_read);
7644         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7645                             rbtdbiter->tree_locked, ISC_FALSE);
7646         NODE_UNLOCK(lock, isc_rwlocktype_read);
7647
7648         rbtdbiter->node = NULL;
7649 }
7650
7651 static void
7652 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7653         dns_rbtnode_t *node;
7654         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7655         isc_boolean_t was_read_locked = ISC_FALSE;
7656         nodelock_t *lock;
7657         int i;
7658
7659         if (rbtdbiter->delete != 0) {
7660                 /*
7661                  * Note that "%d node of %d in tree" can report things like
7662                  * "flush_deletions: 59 nodes of 41 in tree".  This means
7663                  * That some nodes appear on the deletions list more than
7664                  * once.  Only the last occurence will actually be deleted.
7665                  */
7666                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7667                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7668                               "flush_deletions: %d nodes of %d in tree",
7669                               rbtdbiter->delete,
7670                               dns_rbt_nodecount(rbtdb->tree));
7671
7672                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7673                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7674                         was_read_locked = ISC_TRUE;
7675                 }
7676                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7677                 rbtdbiter->tree_locked = isc_rwlocktype_write;
7678
7679                 for (i = 0; i < rbtdbiter->delete; i++) {
7680                         node = rbtdbiter->deletions[i];
7681                         lock = &rbtdb->node_locks[node->locknum].lock;
7682
7683                         NODE_LOCK(lock, isc_rwlocktype_read);
7684                         decrement_reference(rbtdb, node, 0,
7685                                             isc_rwlocktype_read,
7686                                             rbtdbiter->tree_locked, ISC_FALSE);
7687                         NODE_UNLOCK(lock, isc_rwlocktype_read);
7688                 }
7689
7690                 rbtdbiter->delete = 0;
7691
7692                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7693                 if (was_read_locked) {
7694                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7695                         rbtdbiter->tree_locked = isc_rwlocktype_read;
7696
7697                 } else {
7698                         rbtdbiter->tree_locked = isc_rwlocktype_none;
7699                 }
7700         }
7701 }
7702
7703 static inline void
7704 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
7705         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7706
7707         REQUIRE(rbtdbiter->paused);
7708         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
7709
7710         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7711         rbtdbiter->tree_locked = isc_rwlocktype_read;
7712
7713         rbtdbiter->paused = ISC_FALSE;
7714 }
7715
7716 static void
7717 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
7718         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
7719         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7720         dns_db_t *db = NULL;
7721
7722         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7723                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7724                 rbtdbiter->tree_locked = isc_rwlocktype_none;
7725         } else
7726                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
7727
7728         dereference_iter_node(rbtdbiter);
7729
7730         flush_deletions(rbtdbiter);
7731
7732         dns_db_attach(rbtdbiter->common.db, &db);
7733         dns_db_detach(&rbtdbiter->common.db);
7734
7735         dns_rbtnodechain_reset(&rbtdbiter->chain);
7736         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7737         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
7738         dns_db_detach(&db);
7739
7740         *iteratorp = NULL;
7741 }
7742
7743 static isc_result_t
7744 dbiterator_first(dns_dbiterator_t *iterator) {
7745         isc_result_t result;
7746         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7747         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7748         dns_name_t *name, *origin;
7749
7750         if (rbtdbiter->result != ISC_R_SUCCESS &&
7751             rbtdbiter->result != ISC_R_NOMORE)
7752                 return (rbtdbiter->result);
7753
7754         if (rbtdbiter->paused)
7755                 resume_iteration(rbtdbiter);
7756
7757         dereference_iter_node(rbtdbiter);
7758
7759         name = dns_fixedname_name(&rbtdbiter->name);
7760         origin = dns_fixedname_name(&rbtdbiter->origin);
7761         dns_rbtnodechain_reset(&rbtdbiter->chain);
7762         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7763
7764         if (rbtdbiter->nsec3only) {
7765                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7766                 result = dns_rbtnodechain_first(rbtdbiter->current,
7767                                                 rbtdb->nsec3, name, origin);
7768         } else {
7769                 rbtdbiter->current = &rbtdbiter->chain;
7770                 result = dns_rbtnodechain_first(rbtdbiter->current,
7771                                                 rbtdb->tree, name, origin);
7772                 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
7773                         rbtdbiter->current = &rbtdbiter->nsec3chain;
7774                         result = dns_rbtnodechain_first(rbtdbiter->current,
7775                                                         rbtdb->nsec3, name,
7776                                                         origin);
7777                 }
7778         }
7779         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7780                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7781                                                   NULL, &rbtdbiter->node);
7782                 if (result == ISC_R_SUCCESS) {
7783                         rbtdbiter->new_origin = ISC_TRUE;
7784                         reference_iter_node(rbtdbiter);
7785                 }
7786         } else {
7787                 INSIST(result == ISC_R_NOTFOUND);
7788                 result = ISC_R_NOMORE; /* The tree is empty. */
7789         }
7790
7791         rbtdbiter->result = result;
7792
7793         return (result);
7794 }
7795
7796 static isc_result_t
7797 dbiterator_last(dns_dbiterator_t *iterator) {
7798         isc_result_t result;
7799         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7800         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7801         dns_name_t *name, *origin;
7802
7803         if (rbtdbiter->result != ISC_R_SUCCESS &&
7804             rbtdbiter->result != ISC_R_NOMORE)
7805                 return (rbtdbiter->result);
7806
7807         if (rbtdbiter->paused)
7808                 resume_iteration(rbtdbiter);
7809
7810         dereference_iter_node(rbtdbiter);
7811
7812         name = dns_fixedname_name(&rbtdbiter->name);
7813         origin = dns_fixedname_name(&rbtdbiter->origin);
7814         dns_rbtnodechain_reset(&rbtdbiter->chain);
7815         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7816
7817         result = ISC_R_NOTFOUND;
7818         if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
7819                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7820                 result = dns_rbtnodechain_last(rbtdbiter->current,
7821                                                rbtdb->nsec3, name, origin);
7822         }
7823         if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
7824                 rbtdbiter->current = &rbtdbiter->chain;
7825                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7826                                                name, origin);
7827         }
7828         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7829                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7830                                                   NULL, &rbtdbiter->node);
7831                 if (result == ISC_R_SUCCESS) {
7832                         rbtdbiter->new_origin = ISC_TRUE;
7833                         reference_iter_node(rbtdbiter);
7834                 }
7835         } else {
7836                 INSIST(result == ISC_R_NOTFOUND);
7837                 result = ISC_R_NOMORE; /* The tree is empty. */
7838         }
7839
7840         rbtdbiter->result = result;
7841
7842         return (result);
7843 }
7844
7845 static isc_result_t
7846 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
7847         isc_result_t result;
7848         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7849         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7850         dns_name_t *iname, *origin;
7851
7852         if (rbtdbiter->result != ISC_R_SUCCESS &&
7853             rbtdbiter->result != ISC_R_NOTFOUND &&
7854             rbtdbiter->result != ISC_R_NOMORE)
7855                 return (rbtdbiter->result);
7856
7857         if (rbtdbiter->paused)
7858                 resume_iteration(rbtdbiter);
7859
7860         dereference_iter_node(rbtdbiter);
7861
7862         iname = dns_fixedname_name(&rbtdbiter->name);
7863         origin = dns_fixedname_name(&rbtdbiter->origin);
7864         dns_rbtnodechain_reset(&rbtdbiter->chain);
7865         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7866
7867         if (rbtdbiter->nsec3only) {
7868                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7869                 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7870                                           &rbtdbiter->node,
7871                                           rbtdbiter->current,
7872                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7873         } else if (rbtdbiter->nonsec3) {
7874                 rbtdbiter->current = &rbtdbiter->chain;
7875                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7876                                           &rbtdbiter->node,
7877                                           rbtdbiter->current,
7878                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7879         } else {
7880                 /*
7881                  * Stay on main chain if not found on either chain.
7882                  */
7883                 rbtdbiter->current = &rbtdbiter->chain;
7884                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7885                                           &rbtdbiter->node,
7886                                           rbtdbiter->current,
7887                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7888                 if (result == DNS_R_PARTIALMATCH) {
7889                         dns_rbtnode_t *node = NULL;
7890                         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7891                                                   &node, &rbtdbiter->nsec3chain,
7892                                                   DNS_RBTFIND_EMPTYDATA,
7893                                                   NULL, NULL);
7894                         if (result == ISC_R_SUCCESS) {
7895                                 rbtdbiter->node = node;
7896                                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7897                         }
7898                 }
7899         }
7900
7901 #if 1
7902         if (result == ISC_R_SUCCESS) {
7903                 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
7904                                                   origin, NULL);
7905                 if (result == ISC_R_SUCCESS) {
7906                         rbtdbiter->new_origin = ISC_TRUE;
7907                         reference_iter_node(rbtdbiter);
7908                 }
7909         } else if (result == DNS_R_PARTIALMATCH) {
7910                 result = ISC_R_NOTFOUND;
7911                 rbtdbiter->node = NULL;
7912         }
7913
7914         rbtdbiter->result = result;
7915 #else
7916         if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7917                 isc_result_t tresult;
7918                 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
7919                                                    origin, NULL);
7920                 if (tresult == ISC_R_SUCCESS) {
7921                         rbtdbiter->new_origin = ISC_TRUE;
7922                         reference_iter_node(rbtdbiter);
7923                 } else {
7924                         result = tresult;
7925                         rbtdbiter->node = NULL;
7926                 }
7927         } else
7928                 rbtdbiter->node = NULL;
7929
7930         rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
7931                             ISC_R_SUCCESS : result;
7932 #endif
7933
7934         return (result);
7935 }
7936
7937 static isc_result_t
7938 dbiterator_prev(dns_dbiterator_t *iterator) {
7939         isc_result_t result;
7940         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7941         dns_name_t *name, *origin;
7942         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7943
7944         REQUIRE(rbtdbiter->node != NULL);
7945
7946         if (rbtdbiter->result != ISC_R_SUCCESS)
7947                 return (rbtdbiter->result);
7948
7949         if (rbtdbiter->paused)
7950                 resume_iteration(rbtdbiter);
7951
7952         name = dns_fixedname_name(&rbtdbiter->name);
7953         origin = dns_fixedname_name(&rbtdbiter->origin);
7954         result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
7955         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
7956             !rbtdbiter->nonsec3 &&
7957             &rbtdbiter->nsec3chain == rbtdbiter->current) {
7958                 rbtdbiter->current = &rbtdbiter->chain;
7959                 dns_rbtnodechain_reset(rbtdbiter->current);
7960                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7961                                                name, origin);
7962                 if (result == ISC_R_NOTFOUND)
7963                         result = ISC_R_NOMORE;
7964         }
7965
7966         dereference_iter_node(rbtdbiter);
7967
7968         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
7969                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
7970                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7971                                                   NULL, &rbtdbiter->node);
7972         }
7973
7974         if (result == ISC_R_SUCCESS)
7975                 reference_iter_node(rbtdbiter);
7976
7977         rbtdbiter->result = result;
7978
7979         return (result);
7980 }
7981
7982 static isc_result_t
7983 dbiterator_next(dns_dbiterator_t *iterator) {
7984         isc_result_t result;
7985         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7986         dns_name_t *name, *origin;
7987         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7988
7989         REQUIRE(rbtdbiter->node != NULL);
7990
7991         if (rbtdbiter->result != ISC_R_SUCCESS)
7992                 return (rbtdbiter->result);
7993
7994         if (rbtdbiter->paused)
7995                 resume_iteration(rbtdbiter);
7996
7997         name = dns_fixedname_name(&rbtdbiter->name);
7998         origin = dns_fixedname_name(&rbtdbiter->origin);
7999         result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8000         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8001             !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8002                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8003                 dns_rbtnodechain_reset(rbtdbiter->current);
8004                 result = dns_rbtnodechain_first(rbtdbiter->current,
8005                                                 rbtdb->nsec3, name, origin);
8006                 if (result == ISC_R_NOTFOUND)
8007                         result = ISC_R_NOMORE;
8008         }
8009
8010         dereference_iter_node(rbtdbiter);
8011
8012         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8013                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8014                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8015                                                   NULL, &rbtdbiter->node);
8016         }
8017         if (result == ISC_R_SUCCESS)
8018                 reference_iter_node(rbtdbiter);
8019
8020         rbtdbiter->result = result;
8021
8022         return (result);
8023 }
8024
8025 static isc_result_t
8026 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8027                    dns_name_t *name)
8028 {
8029         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8030         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8031         dns_rbtnode_t *node = rbtdbiter->node;
8032         isc_result_t result;
8033         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8034         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8035
8036         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8037         REQUIRE(rbtdbiter->node != NULL);
8038
8039         if (rbtdbiter->paused)
8040                 resume_iteration(rbtdbiter);
8041
8042         if (name != NULL) {
8043                 if (rbtdbiter->common.relative_names)
8044                         origin = NULL;
8045                 result = dns_name_concatenate(nodename, origin, name, NULL);
8046                 if (result != ISC_R_SUCCESS)
8047                         return (result);
8048                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8049                         result = DNS_R_NEWORIGIN;
8050         } else
8051                 result = ISC_R_SUCCESS;
8052
8053         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8054         new_reference(rbtdb, node);
8055         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8056
8057         *nodep = rbtdbiter->node;
8058
8059         if (iterator->cleaning && result == ISC_R_SUCCESS) {
8060                 isc_result_t expire_result;
8061
8062                 /*
8063                  * If the deletion array is full, flush it before trying
8064                  * to expire the current node.  The current node can't
8065                  * fully deleted while the iteration cursor is still on it.
8066                  */
8067                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8068                         flush_deletions(rbtdbiter);
8069
8070                 expire_result = expirenode(iterator->db, *nodep, 0);
8071
8072                 /*
8073                  * expirenode() currently always returns success.
8074                  */
8075                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8076                         unsigned int refs;
8077
8078                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
8079                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8080                         dns_rbtnode_refincrement(node, &refs);
8081                         INSIST(refs != 0);
8082                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8083                 }
8084         }
8085
8086         return (result);
8087 }
8088
8089 static isc_result_t
8090 dbiterator_pause(dns_dbiterator_t *iterator) {
8091         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8092         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8093
8094         if (rbtdbiter->result != ISC_R_SUCCESS &&
8095             rbtdbiter->result != ISC_R_NOMORE)
8096                 return (rbtdbiter->result);
8097
8098         if (rbtdbiter->paused)
8099                 return (ISC_R_SUCCESS);
8100
8101         rbtdbiter->paused = ISC_TRUE;
8102
8103         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8104                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8105                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8106                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8107         }
8108
8109         flush_deletions(rbtdbiter);
8110
8111         return (ISC_R_SUCCESS);
8112 }
8113
8114 static isc_result_t
8115 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8116         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8117         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8118
8119         if (rbtdbiter->result != ISC_R_SUCCESS)
8120                 return (rbtdbiter->result);
8121
8122         return (dns_name_copy(origin, name, NULL));
8123 }
8124
8125 /*%
8126  * Additional cache routines.
8127  */
8128 static isc_result_t
8129 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8130                        dns_rdatatype_t qtype, dns_acache_t *acache,
8131                        dns_zone_t **zonep, dns_db_t **dbp,
8132                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8133                        dns_name_t *fname, dns_message_t *msg,
8134                        isc_stdtime_t now)
8135 {
8136         dns_rbtdb_t *rbtdb = rdataset->private1;
8137         dns_rbtnode_t *rbtnode = rdataset->private2;
8138         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8139         unsigned int current_count = rdataset->privateuint4;
8140         unsigned int count;
8141         rdatasetheader_t *header;
8142         nodelock_t *nodelock;
8143         unsigned int total_count;
8144         acachectl_t *acarray;
8145         dns_acacheentry_t *entry;
8146         isc_result_t result;
8147
8148         UNUSED(qtype); /* we do not use this value at least for now */
8149         UNUSED(acache);
8150
8151         header = (struct rdatasetheader *)(raw - sizeof(*header));
8152
8153         total_count = raw[0] * 256 + raw[1];
8154         INSIST(total_count > current_count);
8155         count = total_count - current_count - 1;
8156
8157         acarray = NULL;
8158
8159         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8160         NODE_LOCK(nodelock, isc_rwlocktype_read);
8161
8162         switch (type) {
8163         case dns_rdatasetadditional_fromauth:
8164                 acarray = header->additional_auth;
8165                 break;
8166         case dns_rdatasetadditional_fromcache:
8167                 acarray = NULL;
8168                 break;
8169         case dns_rdatasetadditional_fromglue:
8170                 acarray = header->additional_glue;
8171                 break;
8172         default:
8173                 INSIST(0);
8174         }
8175
8176         if (acarray == NULL) {
8177                 if (type != dns_rdatasetadditional_fromcache)
8178                         dns_acache_countquerymiss(acache);
8179                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8180                 return (ISC_R_NOTFOUND);
8181         }
8182
8183         if (acarray[count].entry == NULL) {
8184                 dns_acache_countquerymiss(acache);
8185                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8186                 return (ISC_R_NOTFOUND);
8187         }
8188
8189         entry = NULL;
8190         dns_acache_attachentry(acarray[count].entry, &entry);
8191
8192         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8193
8194         result = dns_acache_getentry(entry, zonep, dbp, versionp,
8195                                      nodep, fname, msg, now);
8196
8197         dns_acache_detachentry(&entry);
8198
8199         return (result);
8200 }
8201
8202 static void
8203 acache_callback(dns_acacheentry_t *entry, void **arg) {
8204         dns_rbtdb_t *rbtdb;
8205         dns_rbtnode_t *rbtnode;
8206         nodelock_t *nodelock;
8207         acachectl_t *acarray = NULL;
8208         acache_cbarg_t *cbarg;
8209         unsigned int count;
8210
8211         REQUIRE(arg != NULL);
8212         cbarg = *arg;
8213
8214         /*
8215          * The caller must hold the entry lock.
8216          */
8217
8218         rbtdb = (dns_rbtdb_t *)cbarg->db;
8219         rbtnode = (dns_rbtnode_t *)cbarg->node;
8220
8221         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8222         NODE_LOCK(nodelock, isc_rwlocktype_write);
8223
8224         switch (cbarg->type) {
8225         case dns_rdatasetadditional_fromauth:
8226                 acarray = cbarg->header->additional_auth;
8227                 break;
8228         case dns_rdatasetadditional_fromglue:
8229                 acarray = cbarg->header->additional_glue;
8230                 break;
8231         default:
8232                 INSIST(0);
8233         }
8234
8235         count = cbarg->count;
8236         if (acarray != NULL && acarray[count].entry == entry) {
8237                 acarray[count].entry = NULL;
8238                 INSIST(acarray[count].cbarg == cbarg);
8239                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8240                 acarray[count].cbarg = NULL;
8241         } else
8242                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8243
8244         dns_acache_detachentry(&entry);
8245
8246         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8247
8248         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8249         dns_db_detach((dns_db_t **)(void*)&rbtdb);
8250
8251         *arg = NULL;
8252 }
8253
8254 static void
8255 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8256                       acache_cbarg_t **cbargp)
8257 {
8258         acache_cbarg_t *cbarg;
8259
8260         REQUIRE(mctx != NULL);
8261         REQUIRE(entry != NULL);
8262         REQUIRE(cbargp != NULL && *cbargp != NULL);
8263
8264         cbarg = *cbargp;
8265
8266         dns_acache_cancelentry(entry);
8267         dns_db_detachnode(cbarg->db, &cbarg->node);
8268         dns_db_detach(&cbarg->db);
8269
8270         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8271
8272         *cbargp = NULL;
8273 }
8274
8275 static isc_result_t
8276 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8277                        dns_rdatatype_t qtype, dns_acache_t *acache,
8278                        dns_zone_t *zone, dns_db_t *db,
8279                        dns_dbversion_t *version, dns_dbnode_t *node,
8280                        dns_name_t *fname)
8281 {
8282         dns_rbtdb_t *rbtdb = rdataset->private1;
8283         dns_rbtnode_t *rbtnode = rdataset->private2;
8284         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8285         unsigned int current_count = rdataset->privateuint4;
8286         rdatasetheader_t *header;
8287         unsigned int total_count, count;
8288         nodelock_t *nodelock;
8289         isc_result_t result;
8290         acachectl_t *acarray;
8291         dns_acacheentry_t *newentry, *oldentry = NULL;
8292         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8293
8294         UNUSED(qtype);
8295
8296         if (type == dns_rdatasetadditional_fromcache)
8297                 return (ISC_R_SUCCESS);
8298
8299         header = (struct rdatasetheader *)(raw - sizeof(*header));
8300
8301         total_count = raw[0] * 256 + raw[1];
8302         INSIST(total_count > current_count);
8303         count = total_count - current_count - 1; /* should be private data */
8304
8305         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8306         if (newcbarg == NULL)
8307                 return (ISC_R_NOMEMORY);
8308         newcbarg->type = type;
8309         newcbarg->count = count;
8310         newcbarg->header = header;
8311         newcbarg->db = NULL;
8312         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8313         newcbarg->node = NULL;
8314         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8315                           &newcbarg->node);
8316         newentry = NULL;
8317         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8318                                         acache_callback, newcbarg, &newentry);
8319         if (result != ISC_R_SUCCESS)
8320                 goto fail;
8321         /* Set cache data in the new entry. */
8322         result = dns_acache_setentry(acache, newentry, zone, db,
8323                                      version, node, fname);
8324         if (result != ISC_R_SUCCESS)
8325                 goto fail;
8326
8327         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8328         NODE_LOCK(nodelock, isc_rwlocktype_write);
8329
8330         acarray = NULL;
8331         switch (type) {
8332         case dns_rdatasetadditional_fromauth:
8333                 acarray = header->additional_auth;
8334                 break;
8335         case dns_rdatasetadditional_fromglue:
8336                 acarray = header->additional_glue;
8337                 break;
8338         default:
8339                 INSIST(0);
8340         }
8341
8342         if (acarray == NULL) {
8343                 unsigned int i;
8344
8345                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8346                                       sizeof(acachectl_t));
8347
8348                 if (acarray == NULL) {
8349                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8350                         goto fail;
8351                 }
8352
8353                 for (i = 0; i < total_count; i++) {
8354                         acarray[i].entry = NULL;
8355                         acarray[i].cbarg = NULL;
8356                 }
8357         }
8358         switch (type) {
8359         case dns_rdatasetadditional_fromauth:
8360                 header->additional_auth = acarray;
8361                 break;
8362         case dns_rdatasetadditional_fromglue:
8363                 header->additional_glue = acarray;
8364                 break;
8365         default:
8366                 INSIST(0);
8367         }
8368
8369         if (acarray[count].entry != NULL) {
8370                 /*
8371                  * Swap the entry.  Delay cleaning-up the old entry since
8372                  * it would require a node lock.
8373                  */
8374                 oldentry = acarray[count].entry;
8375                 INSIST(acarray[count].cbarg != NULL);
8376                 oldcbarg = acarray[count].cbarg;
8377         }
8378         acarray[count].entry = newentry;
8379         acarray[count].cbarg = newcbarg;
8380
8381         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8382
8383         if (oldentry != NULL) {
8384                 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8385                 dns_acache_detachentry(&oldentry);
8386         }
8387
8388         return (ISC_R_SUCCESS);
8389
8390  fail:
8391         if (newcbarg != NULL) {
8392                 if (newentry != NULL) {
8393                         acache_cancelentry(rbtdb->common.mctx, newentry,
8394                                            &newcbarg);
8395                         dns_acache_detachentry(&newentry);
8396                 } else {
8397                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8398                         dns_db_detach(&newcbarg->db);
8399                         isc_mem_put(rbtdb->common.mctx, newcbarg,
8400                             sizeof(*newcbarg));
8401                 }
8402         }
8403
8404         return (result);
8405 }
8406
8407 static isc_result_t
8408 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8409                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8410 {
8411         dns_rbtdb_t *rbtdb = rdataset->private1;
8412         dns_rbtnode_t *rbtnode = rdataset->private2;
8413         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8414         unsigned int current_count = rdataset->privateuint4;
8415         rdatasetheader_t *header;
8416         nodelock_t *nodelock;
8417         unsigned int total_count, count;
8418         acachectl_t *acarray;
8419         dns_acacheentry_t *entry;
8420         acache_cbarg_t *cbarg;
8421
8422         UNUSED(qtype);          /* we do not use this value at least for now */
8423         UNUSED(acache);
8424
8425         if (type == dns_rdatasetadditional_fromcache)
8426                 return (ISC_R_SUCCESS);
8427
8428         header = (struct rdatasetheader *)(raw - sizeof(*header));
8429
8430         total_count = raw[0] * 256 + raw[1];
8431         INSIST(total_count > current_count);
8432         count = total_count - current_count - 1;
8433
8434         acarray = NULL;
8435         entry = NULL;
8436
8437         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8438         NODE_LOCK(nodelock, isc_rwlocktype_write);
8439
8440         switch (type) {
8441         case dns_rdatasetadditional_fromauth:
8442                 acarray = header->additional_auth;
8443                 break;
8444         case dns_rdatasetadditional_fromglue:
8445                 acarray = header->additional_glue;
8446                 break;
8447         default:
8448                 INSIST(0);
8449         }
8450
8451         if (acarray == NULL) {
8452                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8453                 return (ISC_R_NOTFOUND);
8454         }
8455
8456         entry = acarray[count].entry;
8457         if (entry == NULL) {
8458                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8459                 return (ISC_R_NOTFOUND);
8460         }
8461
8462         acarray[count].entry = NULL;
8463         cbarg = acarray[count].cbarg;
8464         acarray[count].cbarg = NULL;
8465
8466         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8467
8468         if (entry != NULL) {
8469                 if (cbarg != NULL)
8470                         acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8471                 dns_acache_detachentry(&entry);
8472         }
8473
8474         return (ISC_R_SUCCESS);
8475 }
8476
8477 /*%
8478  * Routines for LRU-based cache management.
8479  */
8480
8481 /*%
8482  * See if a given cache entry that is being reused needs to be updated
8483  * in the LRU-list.  From the LRU management point of view, this function is
8484  * expected to return true for almost all cases.  When used with threads,
8485  * however, this may cause a non-negligible performance penalty because a
8486  * writer lock will have to be acquired before updating the list.
8487  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8488  * function returns true if the entry has not been updated for some period of
8489  * time.  We differentiate the NS or glue address case and the others since
8490  * experiments have shown that the former tends to be accessed relatively
8491  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8492  * may cause external queries at a higher level zone, involving more
8493  * transactions).
8494  *
8495  * Caller must hold the node (read or write) lock.
8496  */
8497 static inline isc_boolean_t
8498 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8499         if ((header->attributes &
8500              (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8501                 return (ISC_FALSE);
8502
8503 #if DNS_RBTDB_LIMITLRUUPDATE
8504         if (header->type == dns_rdatatype_ns ||
8505             (header->trust == dns_trust_glue &&
8506              (header->type == dns_rdatatype_a ||
8507               header->type == dns_rdatatype_aaaa))) {
8508                 /*
8509                  * Glue records are updated if at least 60 seconds have passed
8510                  * since the previous update time.
8511                  */
8512                 return (header->last_used + 60 <= now);
8513         }
8514
8515         /* Other records are updated if 5 minutes have passed. */
8516         return (header->last_used + 300 <= now);
8517 #else
8518         UNUSED(now);
8519
8520         return (ISC_TRUE);
8521 #endif
8522 }
8523
8524 /*%
8525  * Update the timestamp of a given cache entry and move it to the head
8526  * of the corresponding LRU list.
8527  *
8528  * Caller must hold the node (write) lock.
8529  *
8530  * Note that the we do NOT touch the heap here, as the TTL has not changed.
8531  */
8532 static void
8533 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8534               isc_stdtime_t now)
8535 {
8536         INSIST(IS_CACHE(rbtdb));
8537
8538         /* To be checked: can we really assume this? XXXMLG */
8539         INSIST(ISC_LINK_LINKED(header, link));
8540
8541         ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
8542         header->last_used = now;
8543         ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
8544 }
8545
8546 /*%
8547  * Purge some expired and/or stale (i.e. unused for some period) cache entries
8548  * under an overmem condition.  To recover from this condition quickly, up to
8549  * 2 entries will be purged.  This process is triggered while adding a new
8550  * entry, and we specifically avoid purging entries in the same LRU bucket as
8551  * the one to which the new entry will belong.  Otherwise, we might purge
8552  * entries of the same name of different RR types while adding RRsets from a
8553  * single response (consider the case where we're adding A and AAAA glue records
8554  * of the same NS name).
8555  */
8556 static void
8557 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8558               isc_stdtime_t now, isc_boolean_t tree_locked)
8559 {
8560         rdatasetheader_t *header, *header_prev;
8561         unsigned int locknum;
8562         int purgecount = 2;
8563
8564         for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8565              locknum != locknum_start && purgecount > 0;
8566              locknum = (locknum + 1) % rbtdb->node_lock_count) {
8567                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8568                           isc_rwlocktype_write);
8569
8570                 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8571                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8572                         expire_header(rbtdb, header, tree_locked);
8573                         purgecount--;
8574                 }
8575
8576                 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8577                      header != NULL && purgecount > 0;
8578                      header = header_prev) {
8579                         header_prev = ISC_LIST_PREV(header, link);
8580                         /*
8581                          * Unlink the entry at this point to avoid checking it
8582                          * again even if it's currently used someone else and
8583                          * cannot be purged at this moment.  This entry won't be
8584                          * referenced any more (so unlinking is safe) since the
8585                          * TTL was reset to 0.
8586                          */
8587                         ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8588                                         link);
8589                         expire_header(rbtdb, header, tree_locked);
8590                         purgecount--;
8591                 }
8592
8593                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8594                                     isc_rwlocktype_write);
8595         }
8596 }
8597
8598 static void
8599 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8600               isc_boolean_t tree_locked)
8601 {
8602         set_ttl(rbtdb, header, 0);
8603         header->attributes |= RDATASET_ATTR_STALE;
8604         header->node->dirty = 1;
8605
8606         /*
8607          * Caller must hold the node (write) lock.
8608          */
8609
8610         if (dns_rbtnode_refcurrent(header->node) == 0) {
8611                 /*
8612                  * If no one else is using the node, we can clean it up now.
8613                  * We first need to gain a new reference to the node to meet a
8614                  * requirement of decrement_reference().
8615                  */
8616                 new_reference(rbtdb, header->node);
8617                 decrement_reference(rbtdb, header->node, 0,
8618                                     isc_rwlocktype_write,
8619                                     tree_locked ? isc_rwlocktype_write :
8620                                     isc_rwlocktype_none, ISC_FALSE);
8621         }
8622 }