]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/bind9/lib/dns/rbtdb.c
MFV of tzdata2010e:
[FreeBSD/FreeBSD.git] / contrib / bind9 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004-2009  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: rbtdb.c,v 1.270.12.16 2009/12/30 08:34:30 jinmei Exp $ */
19
20 /*! \file */
21
22 /*
23  * Principal Author: Bob Halley
24  */
25
26 #include <config.h>
27
28 /* #define inline */
29
30 #include <isc/event.h>
31 #include <isc/heap.h>
32 #include <isc/mem.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
41 #include <isc/task.h>
42 #include <isc/time.h>
43 #include <isc/util.h>
44
45 #include <dns/acache.h>
46 #include <dns/db.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
50 #include <dns/lib.h>
51 #include <dns/log.h>
52 #include <dns/masterdump.h>
53 #include <dns/nsec.h>
54 #include <dns/nsec3.h>
55 #include <dns/rbt.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
63 #include <dns/view.h>
64 #include <dns/zone.h>
65 #include <dns/zonekey.h>
66
67 #ifdef DNS_RBTDB_VERSION64
68 #include "rbtdb64.h"
69 #else
70 #include "rbtdb.h"
71 #endif
72
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
75 #else
76 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
77 #endif
78
79 /*%
80  * Note that "impmagic" is not the first four bytes of the struct, so
81  * ISC_MAGIC_VALID cannot be used.
82  */
83 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
84                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
85
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t                    rbtdb_serial_t;
88 /*%
89  * Make casting easier in symbolic debuggers by using different names
90  * for the 64 bit version.
91  */
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
95 #else
96 typedef isc_uint32_t                    rbtdb_serial_t;
97 #endif
98
99 typedef isc_uint32_t                    rbtdb_rdatatype_t;
100
101 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
104
105 #define RBTDB_RDATATYPE_SIGNSEC \
106                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_NCACHEANY \
116                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
117
118 /*
119  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
120  * Using rwlock is effective with regard to lookup performance only when
121  * it is implemented in an efficient way.
122  * Otherwise, it is generally wise to stick to the simple locking since rwlock
123  * would require more memory or can even make lookups slower due to its own
124  * overhead (when it internally calls mutex locks).
125  */
126 #ifdef ISC_RWLOCK_USEATOMIC
127 #define DNS_RBTDB_USERWLOCK 1
128 #else
129 #define DNS_RBTDB_USERWLOCK 0
130 #endif
131
132 #if DNS_RBTDB_USERWLOCK
133 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
134 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
135 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
136 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
137 #else
138 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
139 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
140 #define RBTDB_LOCK(l, t)        LOCK(l)
141 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
142 #endif
143
144 /*
145  * Since node locking is sensitive to both performance and memory footprint,
146  * we need some trick here.  If we have both high-performance rwlock and
147  * high performance and small-memory reference counters, we use rwlock for
148  * node lock and isc_refcount for node references.  In this case, we don't have
149  * to protect the access to the counters by locks.
150  * Otherwise, we simply use ordinary mutex lock for node locking, and use
151  * simple integers as reference counters which is protected by the lock.
152  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
153  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
154  * counters first and then protect other parts of a node as read-only data.
155  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
156  * provided for these special cases.  When we can use the efficient backend
157  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
158  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
159  * section including the access to the reference counter.
160  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
161  * section is also protected by NODE_STRONGLOCK().
162  */
163 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
164 typedef isc_rwlock_t nodelock_t;
165
166 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
167 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
168 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
169 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
170 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
171
172 #define NODE_STRONGLOCK(l)      ((void)0)
173 #define NODE_STRONGUNLOCK(l)    ((void)0)
174 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
175 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
176 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
177 #else
178 typedef isc_mutex_t nodelock_t;
179
180 #define NODE_INITLOCK(l)        isc_mutex_init(l)
181 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
182 #define NODE_LOCK(l, t)         LOCK(l)
183 #define NODE_UNLOCK(l, t)       UNLOCK(l)
184 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
185
186 #define NODE_STRONGLOCK(l)      LOCK(l)
187 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
188 #define NODE_WEAKLOCK(l, t)     ((void)0)
189 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
190 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
191 #endif
192
193 /*%
194  * Whether to rate-limit updating the LRU to avoid possible thread contention.
195  * Our performance measurement has shown the cost is marginal, so it's defined
196  * to be 0 by default either with or without threads.
197  */
198 #ifndef DNS_RBTDB_LIMITLRUUPDATE
199 #define DNS_RBTDB_LIMITLRUUPDATE 0
200 #endif
201
202 /*
203  * Allow clients with a virtual time of up to 5 minutes in the past to see
204  * records that would have otherwise have expired.
205  */
206 #define RBTDB_VIRTUAL 300
207
208 struct noqname {
209         dns_name_t      name;
210         void *          neg;
211         void *          negsig;
212         dns_rdatatype_t type;
213 };
214
215 typedef struct acachectl acachectl_t;
216
217 typedef struct rdatasetheader {
218         /*%
219          * Locked by the owning node's lock.
220          */
221         rbtdb_serial_t                  serial;
222         dns_ttl_t                       rdh_ttl;
223         rbtdb_rdatatype_t               type;
224         isc_uint16_t                    attributes;
225         dns_trust_t                     trust;
226         struct noqname                  *noqname;
227         struct noqname                  *closest;
228         /*%<
229          * We don't use the LIST macros, because the LIST structure has
230          * both head and tail pointers, and is doubly linked.
231          */
232
233         struct rdatasetheader           *next;
234         /*%<
235          * If this is the top header for an rdataset, 'next' points
236          * to the top header for the next rdataset (i.e., the next type).
237          * Otherwise, it points up to the header whose down pointer points
238          * at this header.
239          */
240
241         struct rdatasetheader           *down;
242         /*%<
243          * Points to the header for the next older version of
244          * this rdataset.
245          */
246
247         isc_uint32_t                    count;
248         /*%<
249          * Monotonously increased every time this rdataset is bound so that
250          * it is used as the base of the starting point in DNS responses
251          * when the "cyclic" rrset-order is required.  Since the ordering
252          * should not be so crucial, no lock is set for the counter for
253          * performance reasons.
254          */
255
256         acachectl_t                     *additional_auth;
257         acachectl_t                     *additional_glue;
258
259         dns_rbtnode_t                   *node;
260         isc_stdtime_t                   last_used;
261         ISC_LINK(struct rdatasetheader) link;
262
263         unsigned int                    heap_index;
264         /*%<
265          * Used for TTL-based cache cleaning.
266          */
267         isc_stdtime_t                   resign;
268 } rdatasetheader_t;
269
270 typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
271 typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;
272
273 #define RDATASET_ATTR_NONEXISTENT       0x0001
274 #define RDATASET_ATTR_STALE             0x0002
275 #define RDATASET_ATTR_IGNORE            0x0004
276 #define RDATASET_ATTR_RETAIN            0x0008
277 #define RDATASET_ATTR_NXDOMAIN          0x0010
278 #define RDATASET_ATTR_RESIGN            0x0020
279 #define RDATASET_ATTR_STATCOUNT         0x0040
280 #define RDATASET_ATTR_OPTOUT            0x0080
281
282 typedef struct acache_cbarg {
283         dns_rdatasetadditional_t        type;
284         unsigned int                    count;
285         dns_db_t                        *db;
286         dns_dbnode_t                    *node;
287         rdatasetheader_t                *header;
288 } acache_cbarg_t;
289
290 struct acachectl {
291         dns_acacheentry_t               *entry;
292         acache_cbarg_t                  *cbarg;
293 };
294
295 /*
296  * XXX
297  * When the cache will pre-expire data (due to memory low or other
298  * situations) before the rdataset's TTL has expired, it MUST
299  * respect the RETAIN bit and not expire the data until its TTL is
300  * expired.
301  */
302
303 #undef IGNORE                   /* WIN32 winbase.h defines this. */
304
305 #define EXISTS(header) \
306         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
307 #define NONEXISTENT(header) \
308         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
309 #define IGNORE(header) \
310         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
311 #define RETAIN(header) \
312         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
313 #define NXDOMAIN(header) \
314         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
315 #define RESIGN(header) \
316         (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
317 #define OPTOUT(header) \
318         (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
319
320 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
321
322 /*%
323  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
324  * There is a tradeoff issue about configuring this value: if this is too
325  * small, it may cause heavier contention between threads; if this is too large,
326  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
327  * The default value should work well for most environments, but this can
328  * also be configurable at compilation time via the
329  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
330  * 1 due to the assumption of overmem_purge().
331  */
332 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
333 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
334 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
335 #else
336 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
337 #endif
338 #else
339 #define DEFAULT_CACHE_NODE_LOCK_COUNT   16
340 #endif  /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
341
342 typedef struct {
343         nodelock_t                      lock;
344         /* Protected in the refcount routines. */
345         isc_refcount_t                  references;
346         /* Locked by lock. */
347         isc_boolean_t                   exiting;
348 } rbtdb_nodelock_t;
349
350 typedef struct rbtdb_changed {
351         dns_rbtnode_t *                 node;
352         isc_boolean_t                   dirty;
353         ISC_LINK(struct rbtdb_changed)  link;
354 } rbtdb_changed_t;
355
356 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
357
358 typedef enum {
359         dns_db_insecure,
360         dns_db_partial,
361         dns_db_secure
362 } dns_db_secure_t;
363
364 typedef struct rbtdb_version {
365         /* Not locked */
366         rbtdb_serial_t                  serial;
367         /*
368          * Protected in the refcount routines.
369          * XXXJT: should we change the lock policy based on the refcount
370          * performance?
371          */
372         isc_refcount_t                  references;
373         /* Locked by database lock. */
374         isc_boolean_t                   writer;
375         isc_boolean_t                   commit_ok;
376         rbtdb_changedlist_t             changed_list;
377         rdatasetheaderlist_t            resigned_list;
378         ISC_LINK(struct rbtdb_version)  link;
379         dns_db_secure_t                 secure;
380         isc_boolean_t                   havensec3;
381         /* NSEC3 parameters */
382         dns_hash_t                      hash;
383         isc_uint8_t                     flags;
384         isc_uint16_t                    iterations;
385         isc_uint8_t                     salt_length;
386         unsigned char                   salt[DNS_NSEC3_SALTSIZE];
387 } rbtdb_version_t;
388
389 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
390
391 typedef struct {
392         /* Unlocked. */
393         dns_db_t                        common;
394 #if DNS_RBTDB_USERWLOCK
395         isc_rwlock_t                    lock;
396 #else
397         isc_mutex_t                     lock;
398 #endif
399         isc_rwlock_t                    tree_lock;
400         unsigned int                    node_lock_count;
401         rbtdb_nodelock_t *              node_locks;
402         dns_rbtnode_t *                 origin_node;
403         dns_stats_t *                   rrsetstats; /* cache DB only */
404         /* Locked by lock. */
405         unsigned int                    active;
406         isc_refcount_t                  references;
407         unsigned int                    attributes;
408         rbtdb_serial_t                  current_serial;
409         rbtdb_serial_t                  least_serial;
410         rbtdb_serial_t                  next_serial;
411         rbtdb_version_t *               current_version;
412         rbtdb_version_t *               future_version;
413         rbtdb_versionlist_t             open_versions;
414         isc_boolean_t                   overmem;
415         isc_task_t *                    task;
416         dns_dbnode_t                    *soanode;
417         dns_dbnode_t                    *nsnode;
418
419         /*
420          * This is a linked list used to implement the LRU cache.  There will
421          * be node_lock_count linked lists here.  Nodes in bucket 1 will be
422          * placed on the linked list rdatasets[1].
423          */
424         rdatasetheaderlist_t            *rdatasets;
425
426         /*%
427          * Temporary storage for stale cache nodes and dynamically deleted
428          * nodes that await being cleaned up.
429          */
430         rbtnodelist_t                   *deadnodes;
431
432         /*
433          * Heaps.  Each of these is used for TTL based expiry.
434          */
435         isc_heap_t                      **heaps;
436
437         /* Locked by tree_lock. */
438         dns_rbt_t *                     tree;
439         dns_rbt_t *                     nsec3;
440
441         /* Unlocked */
442         unsigned int                    quantum;
443 } dns_rbtdb_t;
444
445 #define RBTDB_ATTR_LOADED               0x01
446 #define RBTDB_ATTR_LOADING              0x02
447
448 /*%
449  * Search Context
450  */
451 typedef struct {
452         dns_rbtdb_t *           rbtdb;
453         rbtdb_version_t *       rbtversion;
454         rbtdb_serial_t          serial;
455         unsigned int            options;
456         dns_rbtnodechain_t      chain;
457         isc_boolean_t           copy_name;
458         isc_boolean_t           need_cleanup;
459         isc_boolean_t           wild;
460         dns_rbtnode_t *         zonecut;
461         rdatasetheader_t *      zonecut_rdataset;
462         rdatasetheader_t *      zonecut_sigrdataset;
463         dns_fixedname_t         zonecut_name;
464         isc_stdtime_t           now;
465 } rbtdb_search_t;
466
467 /*%
468  * Load Context
469  */
470 typedef struct {
471         dns_rbtdb_t *           rbtdb;
472         isc_stdtime_t           now;
473 } rbtdb_load_t;
474
475 static void rdataset_disassociate(dns_rdataset_t *rdataset);
476 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
477 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
478 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
479 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
480 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
481 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
482                                         dns_name_t *name,
483                                         dns_rdataset_t *neg,
484                                         dns_rdataset_t *negsig);
485 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
486                                         dns_name_t *name,
487                                         dns_rdataset_t *neg,
488                                         dns_rdataset_t *negsig);
489 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
490                                            dns_rdatasetadditional_t type,
491                                            dns_rdatatype_t qtype,
492                                            dns_acache_t *acache,
493                                            dns_zone_t **zonep,
494                                            dns_db_t **dbp,
495                                            dns_dbversion_t **versionp,
496                                            dns_dbnode_t **nodep,
497                                            dns_name_t *fname,
498                                            dns_message_t *msg,
499                                            isc_stdtime_t now);
500 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
501                                            dns_rdatasetadditional_t type,
502                                            dns_rdatatype_t qtype,
503                                            dns_acache_t *acache,
504                                            dns_zone_t *zone,
505                                            dns_db_t *db,
506                                            dns_dbversion_t *version,
507                                            dns_dbnode_t *node,
508                                            dns_name_t *fname);
509 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
510                                            dns_rdataset_t *rdataset,
511                                            dns_rdatasetadditional_t type,
512                                            dns_rdatatype_t qtype);
513 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
514                                               isc_stdtime_t now);
515 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
516                           isc_stdtime_t now);
517 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
518                           isc_boolean_t tree_locked);
519 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
520                           isc_stdtime_t now, isc_boolean_t tree_locked);
521 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
522                                   rdatasetheader_t *newheader);
523 static void prune_tree(isc_task_t *task, isc_event_t *event);
524
525 static dns_rdatasetmethods_t rdataset_methods = {
526         rdataset_disassociate,
527         rdataset_first,
528         rdataset_next,
529         rdataset_current,
530         rdataset_clone,
531         rdataset_count,
532         NULL,
533         rdataset_getnoqname,
534         NULL,
535         rdataset_getclosest,
536         rdataset_getadditional,
537         rdataset_setadditional,
538         rdataset_putadditional
539 };
540
541 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
542 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
543 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
544 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
545                                  dns_rdataset_t *rdataset);
546
547 static dns_rdatasetitermethods_t rdatasetiter_methods = {
548         rdatasetiter_destroy,
549         rdatasetiter_first,
550         rdatasetiter_next,
551         rdatasetiter_current
552 };
553
554 typedef struct rbtdb_rdatasetiter {
555         dns_rdatasetiter_t              common;
556         rdatasetheader_t *              current;
557 } rbtdb_rdatasetiter_t;
558
559 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
560 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
561 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
562 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
563                                         dns_name_t *name);
564 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
565 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
566 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
567                                            dns_dbnode_t **nodep,
568                                            dns_name_t *name);
569 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
570 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
571                                           dns_name_t *name);
572
573 static dns_dbiteratormethods_t dbiterator_methods = {
574         dbiterator_destroy,
575         dbiterator_first,
576         dbiterator_last,
577         dbiterator_seek,
578         dbiterator_prev,
579         dbiterator_next,
580         dbiterator_current,
581         dbiterator_pause,
582         dbiterator_origin
583 };
584
585 #define DELETION_BATCH_MAX 64
586
587 /*
588  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
589  */
590 typedef struct rbtdb_dbiterator {
591         dns_dbiterator_t                common;
592         isc_boolean_t                   paused;
593         isc_boolean_t                   new_origin;
594         isc_rwlocktype_t                tree_locked;
595         isc_result_t                    result;
596         dns_fixedname_t                 name;
597         dns_fixedname_t                 origin;
598         dns_rbtnodechain_t              chain;
599         dns_rbtnodechain_t              nsec3chain;
600         dns_rbtnodechain_t              *current;
601         dns_rbtnode_t                   *node;
602         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
603         int                             delete;
604         isc_boolean_t                   nsec3only;
605         isc_boolean_t                   nonsec3;
606 } rbtdb_dbiterator_t;
607
608
609 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
610 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
611
612 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
613                        isc_event_t *event);
614 static void overmem(dns_db_t *db, isc_boolean_t overmem);
615 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
616                                isc_boolean_t *nsec3createflag);
617
618 /*%
619  * 'init_count' is used to initialize 'newheader->count' which inturn
620  * is used to determine where in the cycle rrset-order cyclic starts.
621  * We don't lock this as we don't care about simultaneous updates.
622  *
623  * Note:
624  *      Both init_count and header->count can be ISC_UINT32_MAX.
625  *      The count on the returned rdataset however can't be as
626  *      that indicates that the database does not implement cyclic
627  *      processing.
628  */
629 static unsigned int init_count;
630
631 /*
632  * Locking
633  *
634  * If a routine is going to lock more than one lock in this module, then
635  * the locking must be done in the following order:
636  *
637  *      Tree Lock
638  *
639  *      Node Lock       (Only one from the set may be locked at one time by
640  *                       any caller)
641  *
642  *      Database Lock
643  *
644  * Failure to follow this hierarchy can result in deadlock.
645  */
646
647 /*
648  * Deleting Nodes
649  *
650  * For zone databases the node for the origin of the zone MUST NOT be deleted.
651  */
652
653
654 /*
655  * DB Routines
656  */
657
658 static void
659 attach(dns_db_t *source, dns_db_t **targetp) {
660         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
661
662         REQUIRE(VALID_RBTDB(rbtdb));
663
664         isc_refcount_increment(&rbtdb->references, NULL);
665
666         *targetp = source;
667 }
668
669 static void
670 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
671         dns_rbtdb_t *rbtdb = event->ev_arg;
672
673         UNUSED(task);
674
675         free_rbtdb(rbtdb, ISC_TRUE, event);
676 }
677
678 static void
679 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
680                   isc_boolean_t increment)
681 {
682         dns_rdatastatstype_t statattributes = 0;
683         dns_rdatastatstype_t base = 0;
684         dns_rdatastatstype_t type;
685
686         /* At the moment we count statistics only for cache DB */
687         INSIST(IS_CACHE(rbtdb));
688
689         if (NXDOMAIN(header))
690                 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
691         else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
692                 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
693                 base = RBTDB_RDATATYPE_EXT(header->type);
694         } else
695                 base = RBTDB_RDATATYPE_BASE(header->type);
696
697         type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
698         if (increment)
699                 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
700         else
701                 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
702 }
703
704 static void
705 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
706         int idx;
707         isc_heap_t *heap;
708         dns_ttl_t oldttl;
709
710         oldttl = header->rdh_ttl;
711         header->rdh_ttl = newttl;
712
713         if (!IS_CACHE(rbtdb))
714                 return;
715
716         /*
717          * It's possible the rbtdb is not a cache.  If this is the case,
718          * we will not have a heap, and we move on.  If we do, though,
719          * we might need to adjust things.
720          */
721         if (header->heap_index == 0 || newttl == oldttl)
722                 return;
723         idx = header->node->locknum;
724         if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
725             return;
726         heap = rbtdb->heaps[idx];
727
728         if (newttl < oldttl)
729                 isc_heap_increased(heap, header->heap_index);
730         else
731                 isc_heap_decreased(heap, header->heap_index);
732 }
733
734 /*%
735  * These functions allow the heap code to rank the priority of each
736  * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
737  */
738 static isc_boolean_t
739 ttl_sooner(void *v1, void *v2) {
740         rdatasetheader_t *h1 = v1;
741         rdatasetheader_t *h2 = v2;
742
743         if (h1->rdh_ttl < h2->rdh_ttl)
744                 return (ISC_TRUE);
745         return (ISC_FALSE);
746 }
747
748 static isc_boolean_t
749 resign_sooner(void *v1, void *v2) {
750         rdatasetheader_t *h1 = v1;
751         rdatasetheader_t *h2 = v2;
752
753         if (h1->resign < h2->resign)
754                 return (ISC_TRUE);
755         return (ISC_FALSE);
756 }
757
758 /*%
759  * This function sets the heap index into the header.
760  */
761 static void
762 set_index(void *what, unsigned int index) {
763         rdatasetheader_t *h = what;
764
765         h->heap_index = index;
766 }
767
768 /*%
769  * Work out how many nodes can be deleted in the time between two
770  * requests to the nameserver.  Smooth the resulting number and use it
771  * as a estimate for the number of nodes to be deleted in the next
772  * iteration.
773  */
774 static unsigned int
775 adjust_quantum(unsigned int old, isc_time_t *start) {
776         unsigned int pps = dns_pps;     /* packets per second */
777         unsigned int interval;
778         isc_uint64_t usecs;
779         isc_time_t end;
780         unsigned int new;
781
782         if (pps < 100)
783                 pps = 100;
784         isc_time_now(&end);
785
786         interval = 1000000 / pps;       /* interval in usec */
787         if (interval == 0)
788                 interval = 1;
789         usecs = isc_time_microdiff(&end, start);
790         if (usecs == 0) {
791                 /*
792                  * We were unable to measure the amount of time taken.
793                  * Double the nodes deleted next time.
794                  */
795                 old *= 2;
796                 if (old > 1000)
797                         old = 1000;
798                 return (old);
799         }
800         new = old * interval;
801         new /= (unsigned int)usecs;
802         if (new == 0)
803                 new = 1;
804         else if (new > 1000)
805                 new = 1000;
806
807         /* Smooth */
808         new = (new + old * 3) / 4;
809
810         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
811                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
812
813         return (new);
814 }
815
816 static void
817 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
818         unsigned int i;
819         isc_ondestroy_t ondest;
820         isc_result_t result;
821         char buf[DNS_NAME_FORMATSIZE];
822         isc_time_t start;
823
824         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
825                 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
826
827         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
828         REQUIRE(rbtdb->future_version == NULL);
829
830         if (rbtdb->current_version != NULL) {
831                 unsigned int refs;
832
833                 isc_refcount_decrement(&rbtdb->current_version->references,
834                                        &refs);
835                 INSIST(refs == 0);
836                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
837                 isc_refcount_destroy(&rbtdb->current_version->references);
838                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
839                             sizeof(rbtdb_version_t));
840         }
841
842         /*
843          * We assume the number of remaining dead nodes is reasonably small;
844          * the overhead of unlinking all nodes here should be negligible.
845          */
846         for (i = 0; i < rbtdb->node_lock_count; i++) {
847                 dns_rbtnode_t *node;
848
849                 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
850                 while (node != NULL) {
851                         ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
852                         node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
853                 }
854         }
855
856         if (event == NULL)
857                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
858  again:
859         if (rbtdb->tree != NULL) {
860                 isc_time_now(&start);
861                 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
862                 if (result == ISC_R_QUOTA) {
863                         INSIST(rbtdb->task != NULL);
864                         if (rbtdb->quantum != 0)
865                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
866                                                                 &start);
867                         if (event == NULL)
868                                 event = isc_event_allocate(rbtdb->common.mctx,
869                                                            NULL,
870                                                          DNS_EVENT_FREESTORAGE,
871                                                            free_rbtdb_callback,
872                                                            rbtdb,
873                                                            sizeof(isc_event_t));
874                         if (event == NULL)
875                                 goto again;
876                         isc_task_send(rbtdb->task, &event);
877                         return;
878                 }
879                 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
880         }
881
882         if (rbtdb->nsec3 != NULL) {
883                 isc_time_now(&start);
884                 result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
885                 if (result == ISC_R_QUOTA) {
886                         INSIST(rbtdb->task != NULL);
887                         if (rbtdb->quantum != 0)
888                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
889                                                                 &start);
890                         if (event == NULL)
891                                 event = isc_event_allocate(rbtdb->common.mctx,
892                                                            NULL,
893                                                          DNS_EVENT_FREESTORAGE,
894                                                            free_rbtdb_callback,
895                                                            rbtdb,
896                                                            sizeof(isc_event_t));
897                         if (event == NULL)
898                                 goto again;
899                         isc_task_send(rbtdb->task, &event);
900                         return;
901                 }
902                 INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
903         }
904
905         if (event != NULL)
906                 isc_event_free(&event);
907         if (log) {
908                 if (dns_name_dynamic(&rbtdb->common.origin))
909                         dns_name_format(&rbtdb->common.origin, buf,
910                                         sizeof(buf));
911                 else
912                         strcpy(buf, "<UNKNOWN>");
913                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
914                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
915                               "done free_rbtdb(%s)", buf);
916         }
917         if (dns_name_dynamic(&rbtdb->common.origin))
918                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
919         for (i = 0; i < rbtdb->node_lock_count; i++) {
920                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
921                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
922         }
923
924         /*
925          * Clean up LRU / re-signing order lists.
926          */
927         if (rbtdb->rdatasets != NULL) {
928                 for (i = 0; i < rbtdb->node_lock_count; i++)
929                         INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
930                 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
931                             rbtdb->node_lock_count *
932                             sizeof(rdatasetheaderlist_t));
933         }
934         /*
935          * Clean up dead node buckets.
936          */
937         if (rbtdb->deadnodes != NULL) {
938                 for (i = 0; i < rbtdb->node_lock_count; i++)
939                         INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
940                 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
941                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
942         }
943         /*
944          * Clean up heap objects.
945          */
946         if (rbtdb->heaps != NULL) {
947                 for (i = 0; i < rbtdb->node_lock_count; i++)
948                         isc_heap_destroy(&rbtdb->heaps[i]);
949                 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
950                             rbtdb->node_lock_count *
951                             sizeof(isc_heap_t *));
952         }
953
954         if (rbtdb->rrsetstats != NULL)
955                 dns_stats_detach(&rbtdb->rrsetstats);
956
957         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
958                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
959         isc_rwlock_destroy(&rbtdb->tree_lock);
960         isc_refcount_destroy(&rbtdb->references);
961         if (rbtdb->task != NULL)
962                 isc_task_detach(&rbtdb->task);
963
964         RBTDB_DESTROYLOCK(&rbtdb->lock);
965         rbtdb->common.magic = 0;
966         rbtdb->common.impmagic = 0;
967         ondest = rbtdb->common.ondest;
968         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
969         isc_ondestroy_notify(&ondest, rbtdb);
970 }
971
972 static inline void
973 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
974         isc_boolean_t want_free = ISC_FALSE;
975         unsigned int i;
976         unsigned int inactive = 0;
977
978         /* XXX check for open versions here */
979
980         if (rbtdb->soanode != NULL)
981                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
982         if (rbtdb->nsnode != NULL)
983                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
984
985         /*
986          * Even though there are no external direct references, there still
987          * may be nodes in use.
988          */
989         for (i = 0; i < rbtdb->node_lock_count; i++) {
990                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
991                 rbtdb->node_locks[i].exiting = ISC_TRUE;
992                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
993                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
994                     == 0) {
995                         inactive++;
996                 }
997         }
998
999         if (inactive != 0) {
1000                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1001                 rbtdb->active -= inactive;
1002                 if (rbtdb->active == 0)
1003                         want_free = ISC_TRUE;
1004                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1005                 if (want_free) {
1006                         char buf[DNS_NAME_FORMATSIZE];
1007                         if (dns_name_dynamic(&rbtdb->common.origin))
1008                                 dns_name_format(&rbtdb->common.origin, buf,
1009                                                 sizeof(buf));
1010                         else
1011                                 strcpy(buf, "<UNKNOWN>");
1012                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1013                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1014                                       "calling free_rbtdb(%s)", buf);
1015                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
1016                 }
1017         }
1018 }
1019
1020 static void
1021 detach(dns_db_t **dbp) {
1022         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1023         unsigned int refs;
1024
1025         REQUIRE(VALID_RBTDB(rbtdb));
1026
1027         isc_refcount_decrement(&rbtdb->references, &refs);
1028
1029         if (refs == 0)
1030                 maybe_free_rbtdb(rbtdb);
1031
1032         *dbp = NULL;
1033 }
1034
1035 static void
1036 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1037         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1038         rbtdb_version_t *version;
1039         unsigned int refs;
1040
1041         REQUIRE(VALID_RBTDB(rbtdb));
1042
1043         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1044         version = rbtdb->current_version;
1045         isc_refcount_increment(&version->references, &refs);
1046         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1047
1048         *versionp = (dns_dbversion_t *)version;
1049 }
1050
1051 static inline rbtdb_version_t *
1052 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1053                  unsigned int references, isc_boolean_t writer)
1054 {
1055         isc_result_t result;
1056         rbtdb_version_t *version;
1057
1058         version = isc_mem_get(mctx, sizeof(*version));
1059         if (version == NULL)
1060                 return (NULL);
1061         version->serial = serial;
1062         result = isc_refcount_init(&version->references, references);
1063         if (result != ISC_R_SUCCESS) {
1064                 isc_mem_put(mctx, version, sizeof(*version));
1065                 return (NULL);
1066         }
1067         version->writer = writer;
1068         version->commit_ok = ISC_FALSE;
1069         ISC_LIST_INIT(version->changed_list);
1070         ISC_LIST_INIT(version->resigned_list);
1071         ISC_LINK_INIT(version, link);
1072
1073         return (version);
1074 }
1075
1076 static isc_result_t
1077 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1078         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1079         rbtdb_version_t *version;
1080
1081         REQUIRE(VALID_RBTDB(rbtdb));
1082         REQUIRE(versionp != NULL && *versionp == NULL);
1083         REQUIRE(rbtdb->future_version == NULL);
1084
1085         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1086         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
1087         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1088                                    ISC_TRUE);
1089         if (version != NULL) {
1090                 version->commit_ok = ISC_TRUE;
1091                 version->secure = rbtdb->current_version->secure;
1092                 version->havensec3 = rbtdb->current_version->havensec3;
1093                 if (version->havensec3) {
1094                         version->flags = rbtdb->current_version->flags;
1095                         version->iterations =
1096                                 rbtdb->current_version->iterations;
1097                         version->hash = rbtdb->current_version->hash;
1098                         version->salt_length =
1099                                 rbtdb->current_version->salt_length;
1100                         memcpy(version->salt, rbtdb->current_version->salt,
1101                                version->salt_length);
1102                 } else {
1103                         version->flags = 0;
1104                         version->iterations = 0;
1105                         version->hash = 0;
1106                         version->salt_length = 0;
1107                         memset(version->salt, 0, sizeof(version->salt));
1108                 }
1109                 rbtdb->next_serial++;
1110                 rbtdb->future_version = version;
1111         }
1112         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1113
1114         if (version == NULL)
1115                 return (ISC_R_NOMEMORY);
1116
1117         *versionp = version;
1118
1119         return (ISC_R_SUCCESS);
1120 }
1121
1122 static void
1123 attachversion(dns_db_t *db, dns_dbversion_t *source,
1124               dns_dbversion_t **targetp)
1125 {
1126         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1127         rbtdb_version_t *rbtversion = source;
1128         unsigned int refs;
1129
1130         REQUIRE(VALID_RBTDB(rbtdb));
1131
1132         isc_refcount_increment(&rbtversion->references, &refs);
1133         INSIST(refs > 1);
1134
1135         *targetp = rbtversion;
1136 }
1137
1138 static rbtdb_changed_t *
1139 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1140             dns_rbtnode_t *node)
1141 {
1142         rbtdb_changed_t *changed;
1143         unsigned int refs;
1144
1145         /*
1146          * Caller must be holding the node lock if its reference must be
1147          * protected by the lock.
1148          */
1149
1150         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1151
1152         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1153
1154         REQUIRE(version->writer);
1155
1156         if (changed != NULL) {
1157                 dns_rbtnode_refincrement(node, &refs);
1158                 INSIST(refs != 0);
1159                 changed->node = node;
1160                 changed->dirty = ISC_FALSE;
1161                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1162         } else
1163                 version->commit_ok = ISC_FALSE;
1164
1165         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1166
1167         return (changed);
1168 }
1169
1170 static void
1171 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1172                  acachectl_t *array)
1173 {
1174         unsigned int count;
1175         unsigned int i;
1176         unsigned char *raw;     /* RDATASLAB */
1177
1178         /*
1179          * The caller must be holding the corresponding node lock.
1180          */
1181
1182         if (array == NULL)
1183                 return;
1184
1185         raw = (unsigned char *)header + sizeof(*header);
1186         count = raw[0] * 256 + raw[1];
1187
1188         /*
1189          * Sanity check: since an additional cache entry has a reference to
1190          * the original DB node (in the callback arg), there should be no
1191          * acache entries when the node can be freed.
1192          */
1193         for (i = 0; i < count; i++)
1194                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1195
1196         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1197 }
1198
1199 static inline void
1200 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1201
1202         if (dns_name_dynamic(&(*noqname)->name))
1203                 dns_name_free(&(*noqname)->name, mctx);
1204         if ((*noqname)->neg != NULL)
1205                 isc_mem_put(mctx, (*noqname)->neg,
1206                             dns_rdataslab_size((*noqname)->neg, 0));
1207         if ((*noqname)->negsig != NULL)
1208                 isc_mem_put(mctx, (*noqname)->negsig,
1209                             dns_rdataslab_size((*noqname)->negsig, 0));
1210         isc_mem_put(mctx, *noqname, sizeof(**noqname));
1211         *noqname = NULL;
1212 }
1213
1214 static inline void
1215 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1216 {
1217         ISC_LINK_INIT(h, link);
1218         h->heap_index = 0;
1219
1220 #if TRACE_HEADER
1221         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1222                 fprintf(stderr, "initialized header: %p\n", h);
1223 #else
1224         UNUSED(rbtdb);
1225 #endif
1226 }
1227
1228 static inline rdatasetheader_t *
1229 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1230 {
1231         rdatasetheader_t *h;
1232
1233         h = isc_mem_get(mctx, sizeof(*h));
1234         if (h == NULL)
1235                 return (NULL);
1236
1237 #if TRACE_HEADER
1238         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1239                 fprintf(stderr, "allocated header: %p\n", h);
1240 #endif
1241         init_rdataset(rbtdb, h);
1242         return (h);
1243 }
1244
1245 static inline void
1246 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1247 {
1248         unsigned int size;
1249         int idx;
1250
1251         if (EXISTS(rdataset) &&
1252             (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1253                 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1254         }
1255
1256         idx = rdataset->node->locknum;
1257         if (ISC_LINK_LINKED(rdataset, link)) {
1258                 INSIST(IS_CACHE(rbtdb));
1259                 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1260         }
1261         if (rdataset->heap_index != 0)
1262                 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1263         rdataset->heap_index = 0;
1264
1265         if (rdataset->noqname != NULL)
1266                 free_noqname(mctx, &rdataset->noqname);
1267         if (rdataset->closest != NULL)
1268                 free_noqname(mctx, &rdataset->closest);
1269
1270         free_acachearray(mctx, rdataset, rdataset->additional_auth);
1271         free_acachearray(mctx, rdataset, rdataset->additional_glue);
1272
1273         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1274                 size = sizeof(*rdataset);
1275         else
1276                 size = dns_rdataslab_size((unsigned char *)rdataset,
1277                                           sizeof(*rdataset));
1278         isc_mem_put(mctx, rdataset, size);
1279 }
1280
1281 static inline void
1282 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1283         rdatasetheader_t *header, *dcurrent;
1284         isc_boolean_t make_dirty = ISC_FALSE;
1285
1286         /*
1287          * Caller must hold the node lock.
1288          */
1289
1290         /*
1291          * We set the IGNORE attribute on rdatasets with serial number
1292          * 'serial'.  When the reference count goes to zero, these rdatasets
1293          * will be cleaned up; until that time, they will be ignored.
1294          */
1295         for (header = node->data; header != NULL; header = header->next) {
1296                 if (header->serial == serial) {
1297                         header->attributes |= RDATASET_ATTR_IGNORE;
1298                         make_dirty = ISC_TRUE;
1299                 }
1300                 for (dcurrent = header->down;
1301                      dcurrent != NULL;
1302                      dcurrent = dcurrent->down) {
1303                         if (dcurrent->serial == serial) {
1304                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1305                                 make_dirty = ISC_TRUE;
1306                         }
1307                 }
1308         }
1309         if (make_dirty)
1310                 node->dirty = 1;
1311 }
1312
1313 static inline void
1314 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1315 {
1316         rdatasetheader_t *d, *down_next;
1317
1318         for (d = top->down; d != NULL; d = down_next) {
1319                 down_next = d->down;
1320                 free_rdataset(rbtdb, mctx, d);
1321         }
1322         top->down = NULL;
1323 }
1324
1325 static inline void
1326 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1327         rdatasetheader_t *current, *top_prev, *top_next;
1328         isc_mem_t *mctx = rbtdb->common.mctx;
1329
1330         /*
1331          * Caller must be holding the node lock.
1332          */
1333
1334         top_prev = NULL;
1335         for (current = node->data; current != NULL; current = top_next) {
1336                 top_next = current->next;
1337                 clean_stale_headers(rbtdb, mctx, current);
1338                 /*
1339                  * If current is nonexistent or stale, we can clean it up.
1340                  */
1341                 if ((current->attributes &
1342                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1343                         if (top_prev != NULL)
1344                                 top_prev->next = current->next;
1345                         else
1346                                 node->data = current->next;
1347                         free_rdataset(rbtdb, mctx, current);
1348                 } else
1349                         top_prev = current;
1350         }
1351         node->dirty = 0;
1352 }
1353
1354 static inline void
1355 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1356                 rbtdb_serial_t least_serial)
1357 {
1358         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1359         rdatasetheader_t *top_prev, *top_next;
1360         isc_mem_t *mctx = rbtdb->common.mctx;
1361         isc_boolean_t still_dirty = ISC_FALSE;
1362
1363         /*
1364          * Caller must be holding the node lock.
1365          */
1366         REQUIRE(least_serial != 0);
1367
1368         top_prev = NULL;
1369         for (current = node->data; current != NULL; current = top_next) {
1370                 top_next = current->next;
1371
1372                 /*
1373                  * First, we clean up any instances of multiple rdatasets
1374                  * with the same serial number, or that have the IGNORE
1375                  * attribute.
1376                  */
1377                 dparent = current;
1378                 for (dcurrent = current->down;
1379                      dcurrent != NULL;
1380                      dcurrent = down_next) {
1381                         down_next = dcurrent->down;
1382                         INSIST(dcurrent->serial <= dparent->serial);
1383                         if (dcurrent->serial == dparent->serial ||
1384                             IGNORE(dcurrent)) {
1385                                 if (down_next != NULL)
1386                                         down_next->next = dparent;
1387                                 dparent->down = down_next;
1388                                 free_rdataset(rbtdb, mctx, dcurrent);
1389                         } else
1390                                 dparent = dcurrent;
1391                 }
1392
1393                 /*
1394                  * We've now eliminated all IGNORE datasets with the possible
1395                  * exception of current, which we now check.
1396                  */
1397                 if (IGNORE(current)) {
1398                         down_next = current->down;
1399                         if (down_next == NULL) {
1400                                 if (top_prev != NULL)
1401                                         top_prev->next = current->next;
1402                                 else
1403                                         node->data = current->next;
1404                                 free_rdataset(rbtdb, mctx, current);
1405                                 /*
1406                                  * current no longer exists, so we can
1407                                  * just continue with the loop.
1408                                  */
1409                                 continue;
1410                         } else {
1411                                 /*
1412                                  * Pull up current->down, making it the new
1413                                  * current.
1414                                  */
1415                                 if (top_prev != NULL)
1416                                         top_prev->next = down_next;
1417                                 else
1418                                         node->data = down_next;
1419                                 down_next->next = top_next;
1420                                 free_rdataset(rbtdb, mctx, current);
1421                                 current = down_next;
1422                         }
1423                 }
1424
1425                 /*
1426                  * We now try to find the first down node less than the
1427                  * least serial.
1428                  */
1429                 dparent = current;
1430                 for (dcurrent = current->down;
1431                      dcurrent != NULL;
1432                      dcurrent = down_next) {
1433                         down_next = dcurrent->down;
1434                         if (dcurrent->serial < least_serial)
1435                                 break;
1436                         dparent = dcurrent;
1437                 }
1438
1439                 /*
1440                  * If there is a such an rdataset, delete it and any older
1441                  * versions.
1442                  */
1443                 if (dcurrent != NULL) {
1444                         do {
1445                                 down_next = dcurrent->down;
1446                                 INSIST(dcurrent->serial <= least_serial);
1447                                 free_rdataset(rbtdb, mctx, dcurrent);
1448                                 dcurrent = down_next;
1449                         } while (dcurrent != NULL);
1450                         dparent->down = NULL;
1451                 }
1452
1453                 /*
1454                  * Note.  The serial number of 'current' might be less than
1455                  * least_serial too, but we cannot delete it because it is
1456                  * the most recent version, unless it is a NONEXISTENT
1457                  * rdataset.
1458                  */
1459                 if (current->down != NULL) {
1460                         still_dirty = ISC_TRUE;
1461                         top_prev = current;
1462                 } else {
1463                         /*
1464                          * If this is a NONEXISTENT rdataset, we can delete it.
1465                          */
1466                         if (NONEXISTENT(current)) {
1467                                 if (top_prev != NULL)
1468                                         top_prev->next = current->next;
1469                                 else
1470                                         node->data = current->next;
1471                                 free_rdataset(rbtdb, mctx, current);
1472                         } else
1473                                 top_prev = current;
1474                 }
1475         }
1476         if (!still_dirty)
1477                 node->dirty = 0;
1478 }
1479
1480 /*%
1481  * Clean up dead nodes.  These are nodes which have no references, and
1482  * have no data.  They are dead but we could not or chose not to delete
1483  * them when we deleted all the data at that node because we did not want
1484  * to wait for the tree write lock.
1485  *
1486  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1487  */
1488 static void
1489 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1490         dns_rbtnode_t *node;
1491         isc_result_t result;
1492         int count = 10;         /* XXXJT: should be adjustable */
1493
1494         node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1495         while (node != NULL && count > 0) {
1496                 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1497
1498                 /*
1499                  * Since we're holding a tree write lock, it should be
1500                  * impossible for this node to be referenced by others.
1501                  */
1502                 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1503                        node->data == NULL);
1504
1505                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1506                 if (node->nsec3)
1507                         result = dns_rbt_deletenode(rbtdb->nsec3, node,
1508                                                     ISC_FALSE);
1509                 else
1510                         result = dns_rbt_deletenode(rbtdb->tree, node,
1511                                                     ISC_FALSE);
1512                 if (result != ISC_R_SUCCESS)
1513                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1514                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1515                                       "cleanup_dead_nodes: "
1516                                       "dns_rbt_deletenode: %s",
1517                                       isc_result_totext(result));
1518                 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1519                 count--;
1520         }
1521 }
1522
1523 /*
1524  * Caller must be holding the node lock if its reference must be protected
1525  * by the lock.
1526  */
1527 static inline void
1528 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1529         unsigned int lockrefs, noderefs;
1530         isc_refcount_t *lockref;
1531
1532         dns_rbtnode_refincrement0(node, &noderefs);
1533         if (noderefs == 1) {    /* this is the first reference to the node */
1534                 lockref = &rbtdb->node_locks[node->locknum].references;
1535                 isc_refcount_increment0(lockref, &lockrefs);
1536                 INSIST(lockrefs != 0);
1537         }
1538         INSIST(noderefs != 0);
1539 }
1540
1541 /*
1542  * This function is assumed to be called when a node is newly referenced
1543  * and can be in the deadnode list.  In that case the node must be retrieved
1544  * from the list because it is going to be used.  In addition, if the caller
1545  * happens to hold a write lock on the tree, it's a good chance to purge dead
1546  * nodes.
1547  * Note: while a new reference is gained in multiple places, there are only very
1548  * few cases where the node can be in the deadnode list (only empty nodes can
1549  * have been added to the list).
1550  */
1551 static inline void
1552 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1553                 isc_rwlocktype_t treelocktype)
1554 {
1555         isc_boolean_t need_relock = ISC_FALSE;
1556
1557         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1558         new_reference(rbtdb, node);
1559
1560         NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1561                       isc_rwlocktype_read);
1562         if (ISC_LINK_LINKED(node, deadlink))
1563                 need_relock = ISC_TRUE;
1564         else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1565                  treelocktype == isc_rwlocktype_write)
1566                 need_relock = ISC_TRUE;
1567         NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1568                         isc_rwlocktype_read);
1569         if (need_relock) {
1570                 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1571                               isc_rwlocktype_write);
1572                 if (ISC_LINK_LINKED(node, deadlink))
1573                         ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1574                                         node, deadlink);
1575                 if (treelocktype == isc_rwlocktype_write)
1576                         cleanup_dead_nodes(rbtdb, node->locknum);
1577                 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1578                                 isc_rwlocktype_write);
1579         }
1580
1581         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1582 }
1583
1584 /*
1585  * Caller must be holding the node lock; either the "strong", read or write
1586  * lock.  Note that the lock must be held even when node references are
1587  * atomically modified; in that case the decrement operation itself does not
1588  * have to be protected, but we must avoid a race condition where multiple
1589  * threads are decreasing the reference to zero simultaneously and at least
1590  * one of them is going to free the node.
1591  * This function returns ISC_TRUE if and only if the node reference decreases
1592  * to zero.
1593  */
1594 static isc_boolean_t
1595 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1596                     rbtdb_serial_t least_serial,
1597                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1598                     isc_boolean_t pruning)
1599 {
1600         isc_result_t result;
1601         isc_boolean_t write_locked;
1602         rbtdb_nodelock_t *nodelock;
1603         unsigned int refs, nrefs;
1604         int bucket = node->locknum;
1605         isc_boolean_t no_reference;
1606
1607         nodelock = &rbtdb->node_locks[bucket];
1608
1609         /* Handle easy and typical case first. */
1610         if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1611                 dns_rbtnode_refdecrement(node, &nrefs);
1612                 INSIST((int)nrefs >= 0);
1613                 if (nrefs == 0) {
1614                         isc_refcount_decrement(&nodelock->references, &refs);
1615                         INSIST((int)refs >= 0);
1616                 }
1617                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1618         }
1619
1620         /* Upgrade the lock? */
1621         if (nlock == isc_rwlocktype_read) {
1622                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1623                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1624         }
1625         dns_rbtnode_refdecrement(node, &nrefs);
1626         INSIST((int)nrefs >= 0);
1627         if (nrefs > 0) {
1628                 /* Restore the lock? */
1629                 if (nlock == isc_rwlocktype_read)
1630                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1631                 return (ISC_FALSE);
1632         }
1633
1634         if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1635                 if (IS_CACHE(rbtdb))
1636                         clean_cache_node(rbtdb, node);
1637                 else {
1638                         if (least_serial == 0) {
1639                                 /*
1640                                  * Caller doesn't know the least serial.
1641                                  * Get it.
1642                                  */
1643                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1644                                 least_serial = rbtdb->least_serial;
1645                                 RBTDB_UNLOCK(&rbtdb->lock,
1646                                              isc_rwlocktype_read);
1647                         }
1648                         clean_zone_node(rbtdb, node, least_serial);
1649                 }
1650         }
1651
1652         isc_refcount_decrement(&nodelock->references, &refs);
1653         INSIST((int)refs >= 0);
1654
1655         /*
1656          * XXXDCL should this only be done for cache zones?
1657          */
1658         if (node->data != NULL || node->down != NULL) {
1659                 /* Restore the lock? */
1660                 if (nlock == isc_rwlocktype_read)
1661                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1662                 return (ISC_TRUE);
1663         }
1664
1665         /*
1666          * Attempt to switch to a write lock on the tree.  If this fails,
1667          * we will add this node to a linked list of nodes in this locking
1668          * bucket which we will free later.
1669          */
1670         if (tlock != isc_rwlocktype_write) {
1671                 /*
1672                  * Locking hierarchy notwithstanding, we don't need to free
1673                  * the node lock before acquiring the tree write lock because
1674                  * we only do a trylock.
1675                  */
1676                 if (tlock == isc_rwlocktype_read)
1677                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1678                 else
1679                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
1680                                                     isc_rwlocktype_write);
1681                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1682                               result == ISC_R_LOCKBUSY);
1683
1684                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1685         } else
1686                 write_locked = ISC_TRUE;
1687
1688         no_reference = ISC_TRUE;
1689         if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1690                 /*
1691                  * We can now delete the node if the reference counter is
1692                  * zero.  This should be typically the case, but a different
1693                  * thread may still gain a (new) reference just before the
1694                  * current thread locks the tree (e.g., in findnode()).
1695                  */
1696
1697                 /*
1698                  * If this node is the only one in the level it's in, deleting
1699                  * this node may recursively make its parent the only node in
1700                  * the parent level; if so, and if no one is currently using
1701                  * the parent node, this is almost the only opportunity to
1702                  * clean it up.  But the recursive cleanup is not that trivial
1703                  * since the child and parent may be in different lock buckets,
1704                  * which would cause a lock order reversal problem.  To avoid
1705                  * the trouble, we'll dispatch a separate event for batch
1706                  * cleaning.  We need to check whether we're deleting the node
1707                  * as a result of pruning to avoid infinite dispatching.
1708                  * Note: pruning happens only when a task has been set for the
1709                  * rbtdb.  If the user of the rbtdb chooses not to set a task,
1710                  * it's their responsibility to purge stale leaves (e.g. by
1711                  * periodic walk-through).
1712                  */
1713                 if (!pruning && node->parent != NULL &&
1714                     node->parent->down == node && node->left == NULL &&
1715                     node->right == NULL && rbtdb->task != NULL) {
1716                         isc_event_t *ev;
1717                         dns_db_t *db;
1718
1719                         ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1720                                                 DNS_EVENT_RBTPRUNE,
1721                                                 prune_tree, node,
1722                                                 sizeof(isc_event_t));
1723                         if (ev != NULL) {
1724                                 new_reference(rbtdb, node);
1725                                 db = NULL;
1726                                 attach((dns_db_t *)rbtdb, &db);
1727                                 ev->ev_sender = db;
1728                                 isc_task_send(rbtdb->task, &ev);
1729                                 no_reference = ISC_FALSE;
1730                         } else {
1731                                 /*
1732                                  * XXX: this is a weird situation.  We could
1733                                  * ignore this error case, but then the stale
1734                                  * node will unlikely be purged except via a
1735                                  * rare condition such as manual cleanup.  So
1736                                  * we queue it in the deadnodes list, hoping
1737                                  * the memory shortage is temporary and the node
1738                                  * will be deleted later.
1739                                  */
1740                                 isc_log_write(dns_lctx,
1741                                               DNS_LOGCATEGORY_DATABASE,
1742                                               DNS_LOGMODULE_CACHE,
1743                                               ISC_LOG_INFO,
1744                                               "decrement_reference: failed to "
1745                                               "allocate pruning event");
1746                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1747                                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1748                                                 deadlink);
1749                         }
1750                 } else {
1751                         if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1752                                 char printname[DNS_NAME_FORMATSIZE];
1753
1754                                 isc_log_write(dns_lctx,
1755                                               DNS_LOGCATEGORY_DATABASE,
1756                                               DNS_LOGMODULE_CACHE,
1757                                               ISC_LOG_DEBUG(1),
1758                                               "decrement_reference: "
1759                                               "delete from rbt: %p %s",
1760                                               node,
1761                                               dns_rbt_formatnodename(node,
1762                                                         printname,
1763                                                         sizeof(printname)));
1764                         }
1765
1766                         INSIST(!ISC_LINK_LINKED(node, deadlink));
1767                         if (node->nsec3)
1768                                 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1769                                                             ISC_FALSE);
1770                         else
1771                                 result = dns_rbt_deletenode(rbtdb->tree, node,
1772                                                             ISC_FALSE);
1773                         if (result != ISC_R_SUCCESS) {
1774                                 isc_log_write(dns_lctx,
1775                                               DNS_LOGCATEGORY_DATABASE,
1776                                               DNS_LOGMODULE_CACHE,
1777                                               ISC_LOG_WARNING,
1778                                               "decrement_reference: "
1779                                               "dns_rbt_deletenode: %s",
1780                                               isc_result_totext(result));
1781                         }
1782                 }
1783         } else if (dns_rbtnode_refcurrent(node) == 0) {
1784                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1785                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1786         } else
1787                 no_reference = ISC_FALSE;
1788
1789         /* Restore the lock? */
1790         if (nlock == isc_rwlocktype_read)
1791                 NODE_WEAKDOWNGRADE(&nodelock->lock);
1792
1793         /*
1794          * Relock a read lock, or unlock the write lock if no lock was held.
1795          */
1796         if (tlock == isc_rwlocktype_none)
1797                 if (write_locked)
1798                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1799
1800         if (tlock == isc_rwlocktype_read)
1801                 if (write_locked)
1802                         isc_rwlock_downgrade(&rbtdb->tree_lock);
1803
1804         return (no_reference);
1805 }
1806
1807 /*
1808  * Prune the tree by recursively cleaning-up single leaves.  In the worst
1809  * case, the number of iteration is the number of tree levels, which is at
1810  * most the maximum number of domain name labels, i.e, 127.  In practice, this
1811  * should be much smaller (only a few times), and even the worst case would be
1812  * acceptable for a single event.
1813  */
1814 static void
1815 prune_tree(isc_task_t *task, isc_event_t *event) {
1816         dns_rbtdb_t *rbtdb = event->ev_sender;
1817         dns_rbtnode_t *node = event->ev_arg;
1818         dns_rbtnode_t *parent;
1819         unsigned int locknum;
1820
1821         UNUSED(task);
1822
1823         isc_event_free(&event);
1824
1825         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1826         locknum = node->locknum;
1827         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1828         do {
1829                 parent = node->parent;
1830                 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1831                                     isc_rwlocktype_write, ISC_TRUE);
1832
1833                 if (parent != NULL && parent->down == NULL) {
1834                         /*
1835                          * node was the only down child of the parent and has
1836                          * just been removed.  We'll then need to examine the
1837                          * parent.  Keep the lock if possible; otherwise,
1838                          * release the old lock and acquire one for the parent.
1839                          */
1840                         if (parent->locknum != locknum) {
1841                                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1842                                             isc_rwlocktype_write);
1843                                 locknum = parent->locknum;
1844                                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1845                                           isc_rwlocktype_write);
1846                         }
1847
1848                         /*
1849                          * We need to gain a reference to the node before
1850                          * decrementing it in the next iteration.  In addition,
1851                          * if the node is in the dead-nodes list, extract it
1852                          * from the list beforehand as we do in
1853                          * reactivate_node().
1854                          */
1855                         new_reference(rbtdb, parent);
1856                         if (ISC_LINK_LINKED(parent, deadlink)) {
1857                                 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1858                                                 parent, deadlink);
1859                         }
1860                 } else
1861                         parent = NULL;
1862
1863                 node = parent;
1864         } while (node != NULL);
1865         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1866         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1867
1868         detach((dns_db_t **)&rbtdb);
1869 }
1870
1871 static inline void
1872 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1873                    rbtdb_changedlist_t *cleanup_list)
1874 {
1875         /*
1876          * Caller must be holding the database lock.
1877          */
1878
1879         rbtdb->least_serial = version->serial;
1880         *cleanup_list = version->changed_list;
1881         ISC_LIST_INIT(version->changed_list);
1882 }
1883
1884 static inline void
1885 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1886         rbtdb_changed_t *changed, *next_changed;
1887
1888         /*
1889          * If the changed record is dirty, then
1890          * an update created multiple versions of
1891          * a given rdataset.  We keep this list
1892          * until we're the least open version, at
1893          * which point it's safe to get rid of any
1894          * older versions.
1895          *
1896          * If the changed record isn't dirty, then
1897          * we don't need it anymore since we're
1898          * committing and not rolling back.
1899          *
1900          * The caller must be holding the database lock.
1901          */
1902         for (changed = HEAD(version->changed_list);
1903              changed != NULL;
1904              changed = next_changed) {
1905                 next_changed = NEXT(changed, link);
1906                 if (!changed->dirty) {
1907                         UNLINK(version->changed_list,
1908                                changed, link);
1909                         APPEND(*cleanup_list,
1910                                changed, link);
1911                 }
1912         }
1913 }
1914
1915 static void
1916 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1917         dns_rdataset_t keyset;
1918         dns_rdataset_t nsecset, signsecset;
1919         dns_rdata_t rdata = DNS_RDATA_INIT;
1920         isc_boolean_t haszonekey = ISC_FALSE;
1921         isc_boolean_t hasnsec = ISC_FALSE;
1922         isc_boolean_t hasoptbit = ISC_FALSE;
1923         isc_boolean_t nsec3createflag = ISC_FALSE;
1924         isc_result_t result;
1925
1926         dns_rdataset_init(&keyset);
1927         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1928                                      0, 0, &keyset, NULL);
1929         if (result == ISC_R_SUCCESS) {
1930                 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1931                 result = dns_rdataset_first(&keyset);
1932                 while (result == ISC_R_SUCCESS) {
1933                         dns_rdataset_current(&keyset, &keyrdata);
1934                         if (dns_zonekey_iszonekey(&keyrdata)) {
1935                                 haszonekey = ISC_TRUE;
1936                                 break;
1937                         }
1938                         result = dns_rdataset_next(&keyset);
1939                 }
1940                 dns_rdataset_disassociate(&keyset);
1941         }
1942         if (!haszonekey) {
1943                 version->secure = dns_db_insecure;
1944                 version->havensec3 = ISC_FALSE;
1945                 return;
1946         }
1947
1948         dns_rdataset_init(&nsecset);
1949         dns_rdataset_init(&signsecset);
1950         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1951                                      0, 0, &nsecset, &signsecset);
1952         if (result == ISC_R_SUCCESS) {
1953                 if (dns_rdataset_isassociated(&signsecset)) {
1954                         hasnsec = ISC_TRUE;
1955                         result = dns_rdataset_first(&nsecset);
1956                         if (result == ISC_R_SUCCESS) {
1957                                 dns_rdataset_current(&nsecset, &rdata);
1958                                 hasoptbit = dns_nsec_typepresent(&rdata,
1959                                                              dns_rdatatype_opt);
1960                         }
1961                         dns_rdataset_disassociate(&signsecset);
1962                 }
1963                 dns_rdataset_disassociate(&nsecset);
1964         }
1965
1966         setnsec3parameters(db, version, &nsec3createflag);
1967
1968         /*
1969          * Do we have a valid NSEC/NSEC3 chain?
1970          */
1971         if (version->havensec3 || (hasnsec && !hasoptbit))
1972                 version->secure = dns_db_secure;
1973         /*
1974          * Do we have a NSEC/NSEC3 chain under creation?
1975          */
1976         else if (hasoptbit || nsec3createflag)
1977                 version->secure = dns_db_partial;
1978         else
1979                 version->secure = dns_db_insecure;
1980 }
1981
1982 /*%<
1983  * Walk the origin node looking for NSEC3PARAM records.
1984  * Cache the nsec3 parameters.
1985  */
1986 static void
1987 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
1988                    isc_boolean_t *nsec3createflag)
1989 {
1990         dns_rbtnode_t *node;
1991         dns_rdata_nsec3param_t nsec3param;
1992         dns_rdata_t rdata = DNS_RDATA_INIT;
1993         isc_region_t region;
1994         isc_result_t result;
1995         rdatasetheader_t *header, *header_next;
1996         unsigned char *raw;             /* RDATASLAB */
1997         unsigned int count, length;
1998         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1999
2000         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2001         version->havensec3 = ISC_FALSE;
2002         node = rbtdb->origin_node;
2003         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2004                   isc_rwlocktype_read);
2005         for (header = node->data;
2006              header != NULL;
2007              header = header_next) {
2008                 header_next = header->next;
2009                 do {
2010                         if (header->serial <= version->serial &&
2011                             !IGNORE(header)) {
2012                                 if (NONEXISTENT(header))
2013                                         header = NULL;
2014                                 break;
2015                         } else
2016                                 header = header->down;
2017                 } while (header != NULL);
2018
2019                 if (header != NULL &&
2020                     header->type == dns_rdatatype_nsec3param) {
2021                         /*
2022                          * Find A NSEC3PARAM with a supported algorithm.
2023                          */
2024                         raw = (unsigned char *)header + sizeof(*header);
2025                         count = raw[0] * 256 + raw[1]; /* count */
2026 #if DNS_RDATASET_FIXED
2027                         raw += count * 4 + 2;
2028 #else
2029                         raw += 2;
2030 #endif
2031                         while (count-- > 0U) {
2032                                 length = raw[0] * 256 + raw[1];
2033 #if DNS_RDATASET_FIXED
2034                                 raw += 4;
2035 #else
2036                                 raw += 2;
2037 #endif
2038                                 region.base = raw;
2039                                 region.length = length;
2040                                 raw += length;
2041                                 dns_rdata_fromregion(&rdata,
2042                                                      rbtdb->common.rdclass,
2043                                                      dns_rdatatype_nsec3param,
2044                                                      &region);
2045                                 result = dns_rdata_tostruct(&rdata,
2046                                                             &nsec3param,
2047                                                             NULL);
2048                                 INSIST(result == ISC_R_SUCCESS);
2049                                 dns_rdata_reset(&rdata);
2050
2051                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2052                                     !dns_nsec3_supportedhash(nsec3param.hash))
2053                                         continue;
2054
2055 #ifdef RFC5155_STRICT
2056                                 if (nsec3param.flags != 0)
2057                                         continue;
2058 #else
2059                                 if ((nsec3param.flags & DNS_NSEC3FLAG_CREATE)
2060                                     != 0)
2061                                         *nsec3createflag = ISC_TRUE;
2062                                 if ((nsec3param.flags & ~DNS_NSEC3FLAG_OPTOUT)
2063                                     != 0)
2064                                         continue;
2065 #endif
2066
2067                                 memcpy(version->salt, nsec3param.salt,
2068                                        nsec3param.salt_length);
2069                                 version->hash = nsec3param.hash;
2070                                 version->salt_length = nsec3param.salt_length;
2071                                 version->iterations = nsec3param.iterations;
2072                                 version->flags = nsec3param.flags;
2073                                 version->havensec3 = ISC_TRUE;
2074                                 /*
2075                                  * Look for a better algorithm than the
2076                                  * unknown test algorithm.
2077                                  */
2078                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2079                                         goto unlock;
2080                         }
2081                 }
2082         }
2083  unlock:
2084         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2085                     isc_rwlocktype_read);
2086         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2087 }
2088
2089 static void
2090 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2091         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2092         rbtdb_version_t *version, *cleanup_version, *least_greater;
2093         isc_boolean_t rollback = ISC_FALSE;
2094         rbtdb_changedlist_t cleanup_list;
2095         rdatasetheaderlist_t resigned_list;
2096         rbtdb_changed_t *changed, *next_changed;
2097         rbtdb_serial_t serial, least_serial;
2098         dns_rbtnode_t *rbtnode;
2099         unsigned int refs;
2100         rdatasetheader_t *header;
2101         isc_boolean_t writer;
2102
2103         REQUIRE(VALID_RBTDB(rbtdb));
2104         version = (rbtdb_version_t *)*versionp;
2105
2106         cleanup_version = NULL;
2107         ISC_LIST_INIT(cleanup_list);
2108         ISC_LIST_INIT(resigned_list);
2109
2110         isc_refcount_decrement(&version->references, &refs);
2111         if (refs > 0) {         /* typical and easy case first */
2112                 if (commit) {
2113                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2114                         INSIST(!version->writer);
2115                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2116                 }
2117                 goto end;
2118         }
2119
2120         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2121         serial = version->serial;
2122         writer = version->writer;
2123         if (version->writer) {
2124                 if (commit) {
2125                         unsigned cur_ref;
2126                         rbtdb_version_t *cur_version;
2127
2128                         INSIST(version->commit_ok);
2129                         INSIST(version == rbtdb->future_version);
2130                         /*
2131                          * The current version is going to be replaced.
2132                          * Release the (likely last) reference to it from the
2133                          * DB itself and unlink it from the open list.
2134                          */
2135                         cur_version = rbtdb->current_version;
2136                         isc_refcount_decrement(&cur_version->references,
2137                                                &cur_ref);
2138                         if (cur_ref == 0) {
2139                                 if (cur_version->serial == rbtdb->least_serial)
2140                                         INSIST(EMPTY(cur_version->changed_list));
2141                                 UNLINK(rbtdb->open_versions,
2142                                        cur_version, link);
2143                         }
2144                         if (EMPTY(rbtdb->open_versions)) {
2145                                 /*
2146                                  * We're going to become the least open
2147                                  * version.
2148                                  */
2149                                 make_least_version(rbtdb, version,
2150                                                    &cleanup_list);
2151                         } else {
2152                                 /*
2153                                  * Some other open version is the
2154                                  * least version.  We can't cleanup
2155                                  * records that were changed in this
2156                                  * version because the older versions
2157                                  * may still be in use by an open
2158                                  * version.
2159                                  *
2160                                  * We can, however, discard the
2161                                  * changed records for things that
2162                                  * we've added that didn't exist in
2163                                  * prior versions.
2164                                  */
2165                                 cleanup_nondirty(version, &cleanup_list);
2166                         }
2167                         /*
2168                          * If the (soon to be former) current version
2169                          * isn't being used by anyone, we can clean
2170                          * it up.
2171                          */
2172                         if (cur_ref == 0) {
2173                                 cleanup_version = cur_version;
2174                                 APPENDLIST(version->changed_list,
2175                                            cleanup_version->changed_list,
2176                                            link);
2177                         }
2178                         /*
2179                          * Become the current version.
2180                          */
2181                         version->writer = ISC_FALSE;
2182                         rbtdb->current_version = version;
2183                         rbtdb->current_serial = version->serial;
2184                         rbtdb->future_version = NULL;
2185
2186                         /*
2187                          * Keep the current version in the open list, and
2188                          * gain a reference for the DB itself (see the DB
2189                          * creation function below).  This must be the only
2190                          * case where we need to increment the counter from
2191                          * zero and need to use isc_refcount_increment0().
2192                          */
2193                         isc_refcount_increment0(&version->references,
2194                                                 &cur_ref);
2195                         INSIST(cur_ref == 1);
2196                         PREPEND(rbtdb->open_versions,
2197                                 rbtdb->current_version, link);
2198                         resigned_list = version->resigned_list;
2199                         ISC_LIST_INIT(version->resigned_list);
2200                 } else {
2201                         /*
2202                          * We're rolling back this transaction.
2203                          */
2204                         cleanup_list = version->changed_list;
2205                         ISC_LIST_INIT(version->changed_list);
2206                         resigned_list = version->resigned_list;
2207                         ISC_LIST_INIT(version->resigned_list);
2208                         rollback = ISC_TRUE;
2209                         cleanup_version = version;
2210                         rbtdb->future_version = NULL;
2211                 }
2212         } else {
2213                 if (version != rbtdb->current_version) {
2214                         /*
2215                          * There are no external or internal references
2216                          * to this version and it can be cleaned up.
2217                          */
2218                         cleanup_version = version;
2219
2220                         /*
2221                          * Find the version with the least serial
2222                          * number greater than ours.
2223                          */
2224                         least_greater = PREV(version, link);
2225                         if (least_greater == NULL)
2226                                 least_greater = rbtdb->current_version;
2227
2228                         INSIST(version->serial < least_greater->serial);
2229                         /*
2230                          * Is this the least open version?
2231                          */
2232                         if (version->serial == rbtdb->least_serial) {
2233                                 /*
2234                                  * Yes.  Install the new least open
2235                                  * version.
2236                                  */
2237                                 make_least_version(rbtdb,
2238                                                    least_greater,
2239                                                    &cleanup_list);
2240                         } else {
2241                                 /*
2242                                  * Add any unexecuted cleanups to
2243                                  * those of the least greater version.
2244                                  */
2245                                 APPENDLIST(least_greater->changed_list,
2246                                            version->changed_list,
2247                                            link);
2248                         }
2249                 } else if (version->serial == rbtdb->least_serial)
2250                         INSIST(EMPTY(version->changed_list));
2251                 UNLINK(rbtdb->open_versions, version, link);
2252         }
2253         least_serial = rbtdb->least_serial;
2254         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2255
2256         /*
2257          * Update the zone's secure status.
2258          */
2259         if (writer && commit && !IS_CACHE(rbtdb))
2260                 iszonesecure(db, version, rbtdb->origin_node);
2261
2262         if (cleanup_version != NULL) {
2263                 INSIST(EMPTY(cleanup_version->changed_list));
2264                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2265                             sizeof(*cleanup_version));
2266         }
2267
2268         /*
2269          * Commit/rollback re-signed headers.
2270          */
2271         for (header = HEAD(resigned_list);
2272              header != NULL;
2273              header = HEAD(resigned_list)) {
2274                 nodelock_t *lock;
2275
2276                 ISC_LIST_UNLINK(resigned_list, header, link);
2277
2278                 lock = &rbtdb->node_locks[header->node->locknum].lock;
2279                 NODE_LOCK(lock, isc_rwlocktype_write);
2280                 if (rollback)
2281                         resign_insert(rbtdb, header->node->locknum, header);
2282                 decrement_reference(rbtdb, header->node, least_serial,
2283                                     isc_rwlocktype_write, isc_rwlocktype_none,
2284                                     ISC_FALSE);
2285                 NODE_UNLOCK(lock, isc_rwlocktype_write);
2286         }
2287
2288         if (!EMPTY(cleanup_list)) {
2289                 /*
2290                  * We acquire a tree write lock here in order to make sure
2291                  * that stale nodes will be removed in decrement_reference().
2292                  * If we didn't have the lock, those nodes could miss the
2293                  * chance to be removed until the server stops.  The write lock
2294                  * is expensive, but this event should be rare enough to justify
2295                  * the cost.
2296                  */
2297                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2298                 for (changed = HEAD(cleanup_list);
2299                      changed != NULL;
2300                      changed = next_changed) {
2301                         nodelock_t *lock;
2302
2303                         next_changed = NEXT(changed, link);
2304                         rbtnode = changed->node;
2305                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2306
2307                         NODE_LOCK(lock, isc_rwlocktype_write);
2308                         /*
2309                          * This is a good opportunity to purge any dead nodes,
2310                          * so use it.
2311                          */
2312                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2313
2314                         if (rollback)
2315                                 rollback_node(rbtnode, serial);
2316                         decrement_reference(rbtdb, rbtnode, least_serial,
2317                                             isc_rwlocktype_write,
2318                                             isc_rwlocktype_write, ISC_FALSE);
2319
2320                         NODE_UNLOCK(lock, isc_rwlocktype_write);
2321
2322                         isc_mem_put(rbtdb->common.mctx, changed,
2323                                     sizeof(*changed));
2324                 }
2325                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2326         }
2327
2328  end:
2329         *versionp = NULL;
2330 }
2331
2332 /*
2333  * Add the necessary magic for the wildcard name 'name'
2334  * to be found in 'rbtdb'.
2335  *
2336  * In order for wildcard matching to work correctly in
2337  * zone_find(), we must ensure that a node for the wildcarding
2338  * level exists in the database, and has its 'find_callback'
2339  * and 'wild' bits set.
2340  *
2341  * E.g. if the wildcard name is "*.sub.example." then we
2342  * must ensure that "sub.example." exists and is marked as
2343  * a wildcard level.
2344  */
2345 static isc_result_t
2346 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2347         isc_result_t result;
2348         dns_name_t foundname;
2349         dns_offsets_t offsets;
2350         unsigned int n;
2351         dns_rbtnode_t *node = NULL;
2352
2353         dns_name_init(&foundname, offsets);
2354         n = dns_name_countlabels(name);
2355         INSIST(n >= 2);
2356         n--;
2357         dns_name_getlabelsequence(name, 1, n, &foundname);
2358         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2359         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2360                 return (result);
2361         node->nsec3 = 0;
2362         node->find_callback = 1;
2363         node->wild = 1;
2364         return (ISC_R_SUCCESS);
2365 }
2366
2367 static isc_result_t
2368 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2369         isc_result_t result;
2370         dns_name_t foundname;
2371         dns_offsets_t offsets;
2372         unsigned int n, l, i;
2373
2374         dns_name_init(&foundname, offsets);
2375         n = dns_name_countlabels(name);
2376         l = dns_name_countlabels(&rbtdb->common.origin);
2377         i = l + 1;
2378         while (i < n) {
2379                 dns_rbtnode_t *node = NULL;     /* dummy */
2380                 dns_name_getlabelsequence(name, n - i, i, &foundname);
2381                 if (dns_name_iswildcard(&foundname)) {
2382                         result = add_wildcard_magic(rbtdb, &foundname);
2383                         if (result != ISC_R_SUCCESS)
2384                                 return (result);
2385                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
2386                                                  &node);
2387                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2388                                 return (result);
2389                         node->nsec3 = 0;
2390                 }
2391                 i++;
2392         }
2393         return (ISC_R_SUCCESS);
2394 }
2395
2396 static isc_result_t
2397 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2398          dns_dbnode_t **nodep)
2399 {
2400         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2401         dns_rbtnode_t *node = NULL;
2402         dns_name_t nodename;
2403         isc_result_t result;
2404         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2405
2406         REQUIRE(VALID_RBTDB(rbtdb));
2407
2408         dns_name_init(&nodename, NULL);
2409         RWLOCK(&rbtdb->tree_lock, locktype);
2410         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2411                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2412         if (result != ISC_R_SUCCESS) {
2413                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2414                 if (!create) {
2415                         if (result == DNS_R_PARTIALMATCH)
2416                                 result = ISC_R_NOTFOUND;
2417                         return (result);
2418                 }
2419                 /*
2420                  * It would be nice to try to upgrade the lock instead of
2421                  * unlocking then relocking.
2422                  */
2423                 locktype = isc_rwlocktype_write;
2424                 RWLOCK(&rbtdb->tree_lock, locktype);
2425                 node = NULL;
2426                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2427                 if (result == ISC_R_SUCCESS) {
2428                         dns_rbt_namefromnode(node, &nodename);
2429 #ifdef DNS_RBT_USEHASH
2430                         node->locknum = node->hashval % rbtdb->node_lock_count;
2431 #else
2432                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2433                                 rbtdb->node_lock_count;
2434 #endif
2435                         node->nsec3 = 0;
2436                         add_empty_wildcards(rbtdb, name);
2437
2438                         if (dns_name_iswildcard(name)) {
2439                                 result = add_wildcard_magic(rbtdb, name);
2440                                 if (result != ISC_R_SUCCESS) {
2441                                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2442                                         return (result);
2443                                 }
2444                         }
2445                 } else if (result != ISC_R_EXISTS) {
2446                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2447                         return (result);
2448                 }
2449         }
2450         reactivate_node(rbtdb, node, locktype);
2451         RWUNLOCK(&rbtdb->tree_lock, locktype);
2452
2453         *nodep = (dns_dbnode_t *)node;
2454
2455         return (ISC_R_SUCCESS);
2456 }
2457
2458 static isc_result_t
2459 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2460               dns_dbnode_t **nodep)
2461 {
2462         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2463         dns_rbtnode_t *node = NULL;
2464         dns_name_t nodename;
2465         isc_result_t result;
2466         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2467
2468         REQUIRE(VALID_RBTDB(rbtdb));
2469
2470         dns_name_init(&nodename, NULL);
2471         RWLOCK(&rbtdb->tree_lock, locktype);
2472         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2473                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2474         if (result != ISC_R_SUCCESS) {
2475                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2476                 if (!create) {
2477                         if (result == DNS_R_PARTIALMATCH)
2478                                 result = ISC_R_NOTFOUND;
2479                         return (result);
2480                 }
2481                 /*
2482                  * It would be nice to try to upgrade the lock instead of
2483                  * unlocking then relocking.
2484                  */
2485                 locktype = isc_rwlocktype_write;
2486                 RWLOCK(&rbtdb->tree_lock, locktype);
2487                 node = NULL;
2488                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2489                 if (result == ISC_R_SUCCESS) {
2490                         dns_rbt_namefromnode(node, &nodename);
2491 #ifdef DNS_RBT_USEHASH
2492                         node->locknum = node->hashval % rbtdb->node_lock_count;
2493 #else
2494                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2495                                 rbtdb->node_lock_count;
2496 #endif
2497                         node->nsec3 = 1U;
2498                 } else if (result != ISC_R_EXISTS) {
2499                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2500                         return (result);
2501                 }
2502         } else
2503                 INSIST(node->nsec3);
2504         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2505         new_reference(rbtdb, node);
2506         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2507         RWUNLOCK(&rbtdb->tree_lock, locktype);
2508
2509         *nodep = (dns_dbnode_t *)node;
2510
2511         return (ISC_R_SUCCESS);
2512 }
2513
2514 static isc_result_t
2515 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2516         rbtdb_search_t *search = arg;
2517         rdatasetheader_t *header, *header_next;
2518         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2519         rdatasetheader_t *found;
2520         isc_result_t result;
2521         dns_rbtnode_t *onode;
2522
2523         /*
2524          * We only want to remember the topmost zone cut, since it's the one
2525          * that counts, so we'll just continue if we've already found a
2526          * zonecut.
2527          */
2528         if (search->zonecut != NULL)
2529                 return (DNS_R_CONTINUE);
2530
2531         found = NULL;
2532         result = DNS_R_CONTINUE;
2533         onode = search->rbtdb->origin_node;
2534
2535         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2536                   isc_rwlocktype_read);
2537
2538         /*
2539          * Look for an NS or DNAME rdataset active in our version.
2540          */
2541         ns_header = NULL;
2542         dname_header = NULL;
2543         sigdname_header = NULL;
2544         for (header = node->data; header != NULL; header = header_next) {
2545                 header_next = header->next;
2546                 if (header->type == dns_rdatatype_ns ||
2547                     header->type == dns_rdatatype_dname ||
2548                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
2549                         do {
2550                                 if (header->serial <= search->serial &&
2551                                     !IGNORE(header)) {
2552                                         /*
2553                                          * Is this a "this rdataset doesn't
2554                                          * exist" record?
2555                                          */
2556                                         if (NONEXISTENT(header))
2557                                                 header = NULL;
2558                                         break;
2559                                 } else
2560                                         header = header->down;
2561                         } while (header != NULL);
2562                         if (header != NULL) {
2563                                 if (header->type == dns_rdatatype_dname)
2564                                         dname_header = header;
2565                                 else if (header->type ==
2566                                            RBTDB_RDATATYPE_SIGDNAME)
2567                                         sigdname_header = header;
2568                                 else if (node != onode ||
2569                                          IS_STUB(search->rbtdb)) {
2570                                         /*
2571                                          * We've found an NS rdataset that
2572                                          * isn't at the origin node.  We check
2573                                          * that they're not at the origin node,
2574                                          * because otherwise we'd erroneously
2575                                          * treat the zone top as if it were
2576                                          * a delegation.
2577                                          */
2578                                         ns_header = header;
2579                                 }
2580                         }
2581                 }
2582         }
2583
2584         /*
2585          * Did we find anything?
2586          */
2587         if (dname_header != NULL) {
2588                 /*
2589                  * Note that DNAME has precedence over NS if both exist.
2590                  */
2591                 found = dname_header;
2592                 search->zonecut_sigrdataset = sigdname_header;
2593         } else if (ns_header != NULL) {
2594                 found = ns_header;
2595                 search->zonecut_sigrdataset = NULL;
2596         }
2597
2598         if (found != NULL) {
2599                 /*
2600                  * We increment the reference count on node to ensure that
2601                  * search->zonecut_rdataset will still be valid later.
2602                  */
2603                 new_reference(search->rbtdb, node);
2604                 search->zonecut = node;
2605                 search->zonecut_rdataset = found;
2606                 search->need_cleanup = ISC_TRUE;
2607                 /*
2608                  * Since we've found a zonecut, anything beneath it is
2609                  * glue and is not subject to wildcard matching, so we
2610                  * may clear search->wild.
2611                  */
2612                 search->wild = ISC_FALSE;
2613                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2614                         /*
2615                          * If the caller does not want to find glue, then
2616                          * this is the best answer and the search should
2617                          * stop now.
2618                          */
2619                         result = DNS_R_PARTIALMATCH;
2620                 } else {
2621                         dns_name_t *zcname;
2622
2623                         /*
2624                          * The search will continue beneath the zone cut.
2625                          * This may or may not be the best match.  In case it
2626                          * is, we need to remember the node name.
2627                          */
2628                         zcname = dns_fixedname_name(&search->zonecut_name);
2629                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2630                                       ISC_R_SUCCESS);
2631                         search->copy_name = ISC_TRUE;
2632                 }
2633         } else {
2634                 /*
2635                  * There is no zonecut at this node which is active in this
2636                  * version.
2637                  *
2638                  * If this is a "wild" node and the caller hasn't disabled
2639                  * wildcard matching, remember that we've seen a wild node
2640                  * in case we need to go searching for wildcard matches
2641                  * later on.
2642                  */
2643                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2644                         search->wild = ISC_TRUE;
2645         }
2646
2647         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2648                     isc_rwlocktype_read);
2649
2650         return (result);
2651 }
2652
2653 static inline void
2654 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2655               rdatasetheader_t *header, isc_stdtime_t now,
2656               dns_rdataset_t *rdataset)
2657 {
2658         unsigned char *raw;     /* RDATASLAB */
2659
2660         /*
2661          * Caller must be holding the node reader lock.
2662          * XXXJT: technically, we need a writer lock, since we'll increment
2663          * the header count below.  However, since the actual counter value
2664          * doesn't matter, we prioritize performance here.  (We may want to
2665          * use atomic increment when available).
2666          */
2667
2668         if (rdataset == NULL)
2669                 return;
2670
2671         new_reference(rbtdb, node);
2672
2673         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
2674
2675         rdataset->methods = &rdataset_methods;
2676         rdataset->rdclass = rbtdb->common.rdclass;
2677         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2678         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2679         rdataset->ttl = header->rdh_ttl - now;
2680         rdataset->trust = header->trust;
2681         if (NXDOMAIN(header))
2682                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2683         if (OPTOUT(header))
2684                 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2685         rdataset->private1 = rbtdb;
2686         rdataset->private2 = node;
2687         raw = (unsigned char *)header + sizeof(*header);
2688         rdataset->private3 = raw;
2689         rdataset->count = header->count++;
2690         if (rdataset->count == ISC_UINT32_MAX)
2691                 rdataset->count = 0;
2692
2693         /*
2694          * Reset iterator state.
2695          */
2696         rdataset->privateuint4 = 0;
2697         rdataset->private5 = NULL;
2698
2699         /*
2700          * Add noqname proof.
2701          */
2702         rdataset->private6 = header->noqname;
2703         if (rdataset->private6 != NULL)
2704                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
2705         rdataset->private7 = header->closest;
2706         if (rdataset->private7 != NULL)
2707                 rdataset->attributes |=  DNS_RDATASETATTR_CLOSEST;
2708
2709         /*
2710          * Copy out re-signing information.
2711          */
2712         if (RESIGN(header)) {
2713                 rdataset->attributes |=  DNS_RDATASETATTR_RESIGN;
2714                 rdataset->resign = header->resign;
2715         } else
2716                 rdataset->resign = 0;
2717 }
2718
2719 static inline isc_result_t
2720 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2721                  dns_name_t *foundname, dns_rdataset_t *rdataset,
2722                  dns_rdataset_t *sigrdataset)
2723 {
2724         isc_result_t result;
2725         dns_name_t *zcname;
2726         rbtdb_rdatatype_t type;
2727         dns_rbtnode_t *node;
2728
2729         /*
2730          * The caller MUST NOT be holding any node locks.
2731          */
2732
2733         node = search->zonecut;
2734         type = search->zonecut_rdataset->type;
2735
2736         /*
2737          * If we have to set foundname, we do it before anything else.
2738          * If we were to set foundname after we had set nodep or bound the
2739          * rdataset, then we'd have to undo that work if dns_name_copy()
2740          * failed.  By setting foundname first, there's nothing to undo if
2741          * we have trouble.
2742          */
2743         if (foundname != NULL && search->copy_name) {
2744                 zcname = dns_fixedname_name(&search->zonecut_name);
2745                 result = dns_name_copy(zcname, foundname, NULL);
2746                 if (result != ISC_R_SUCCESS)
2747                         return (result);
2748         }
2749         if (nodep != NULL) {
2750                 /*
2751                  * Note that we don't have to increment the node's reference
2752                  * count here because we're going to use the reference we
2753                  * already have in the search block.
2754                  */
2755                 *nodep = node;
2756                 search->need_cleanup = ISC_FALSE;
2757         }
2758         if (rdataset != NULL) {
2759                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2760                           isc_rwlocktype_read);
2761                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2762                               search->now, rdataset);
2763                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2764                         bind_rdataset(search->rbtdb, node,
2765                                       search->zonecut_sigrdataset,
2766                                       search->now, sigrdataset);
2767                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2768                             isc_rwlocktype_read);
2769         }
2770
2771         if (type == dns_rdatatype_dname)
2772                 return (DNS_R_DNAME);
2773         return (DNS_R_DELEGATION);
2774 }
2775
2776 static inline isc_boolean_t
2777 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2778            dns_rbtnode_t *node)
2779 {
2780         unsigned char *raw;     /* RDATASLAB */
2781         unsigned int count, size;
2782         dns_name_t ns_name;
2783         isc_boolean_t valid = ISC_FALSE;
2784         dns_offsets_t offsets;
2785         isc_region_t region;
2786         rdatasetheader_t *header;
2787
2788         /*
2789          * No additional locking is required.
2790          */
2791
2792         /*
2793          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
2794          * if it occurs at a zone cut, but is not valid below it.
2795          */
2796         if (type == dns_rdatatype_ns) {
2797                 if (node != search->zonecut) {
2798                         return (ISC_FALSE);
2799                 }
2800         } else if (type != dns_rdatatype_a &&
2801                    type != dns_rdatatype_aaaa &&
2802                    type != dns_rdatatype_a6) {
2803                 return (ISC_FALSE);
2804         }
2805
2806         header = search->zonecut_rdataset;
2807         raw = (unsigned char *)header + sizeof(*header);
2808         count = raw[0] * 256 + raw[1];
2809 #if DNS_RDATASET_FIXED
2810         raw += 2 + (4 * count);
2811 #else
2812         raw += 2;
2813 #endif
2814
2815         while (count > 0) {
2816                 count--;
2817                 size = raw[0] * 256 + raw[1];
2818 #if DNS_RDATASET_FIXED
2819                 raw += 4;
2820 #else
2821                 raw += 2;
2822 #endif
2823                 region.base = raw;
2824                 region.length = size;
2825                 raw += size;
2826                 /*
2827                  * XXX Until we have rdata structures, we have no choice but
2828                  * to directly access the rdata format.
2829                  */
2830                 dns_name_init(&ns_name, offsets);
2831                 dns_name_fromregion(&ns_name, &region);
2832                 if (dns_name_compare(&ns_name, name) == 0) {
2833                         valid = ISC_TRUE;
2834                         break;
2835                 }
2836         }
2837
2838         return (valid);
2839 }
2840
2841 static inline isc_boolean_t
2842 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2843             dns_name_t *name)
2844 {
2845         dns_fixedname_t fnext;
2846         dns_fixedname_t forigin;
2847         dns_name_t *next;
2848         dns_name_t *origin;
2849         dns_name_t prefix;
2850         dns_rbtdb_t *rbtdb;
2851         dns_rbtnode_t *node;
2852         isc_result_t result;
2853         isc_boolean_t answer = ISC_FALSE;
2854         rdatasetheader_t *header;
2855
2856         rbtdb = search->rbtdb;
2857
2858         dns_name_init(&prefix, NULL);
2859         dns_fixedname_init(&fnext);
2860         next = dns_fixedname_name(&fnext);
2861         dns_fixedname_init(&forigin);
2862         origin = dns_fixedname_name(&forigin);
2863
2864         result = dns_rbtnodechain_next(chain, NULL, NULL);
2865         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2866                 node = NULL;
2867                 result = dns_rbtnodechain_current(chain, &prefix,
2868                                                   origin, &node);
2869                 if (result != ISC_R_SUCCESS)
2870                         break;
2871                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2872                           isc_rwlocktype_read);
2873                 for (header = node->data;
2874                      header != NULL;
2875                      header = header->next) {
2876                         if (header->serial <= search->serial &&
2877                             !IGNORE(header) && EXISTS(header))
2878                                 break;
2879                 }
2880                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2881                             isc_rwlocktype_read);
2882                 if (header != NULL)
2883                         break;
2884                 result = dns_rbtnodechain_next(chain, NULL, NULL);
2885         }
2886         if (result == ISC_R_SUCCESS)
2887                 result = dns_name_concatenate(&prefix, origin, next, NULL);
2888         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2889                 answer = ISC_TRUE;
2890         return (answer);
2891 }
2892
2893 static inline isc_boolean_t
2894 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2895         dns_fixedname_t fnext;
2896         dns_fixedname_t forigin;
2897         dns_fixedname_t fprev;
2898         dns_name_t *next;
2899         dns_name_t *origin;
2900         dns_name_t *prev;
2901         dns_name_t name;
2902         dns_name_t rname;
2903         dns_name_t tname;
2904         dns_rbtdb_t *rbtdb;
2905         dns_rbtnode_t *node;
2906         dns_rbtnodechain_t chain;
2907         isc_boolean_t check_next = ISC_TRUE;
2908         isc_boolean_t check_prev = ISC_TRUE;
2909         isc_boolean_t answer = ISC_FALSE;
2910         isc_result_t result;
2911         rdatasetheader_t *header;
2912         unsigned int n;
2913
2914         rbtdb = search->rbtdb;
2915
2916         dns_name_init(&name, NULL);
2917         dns_name_init(&tname, NULL);
2918         dns_name_init(&rname, NULL);
2919         dns_fixedname_init(&fnext);
2920         next = dns_fixedname_name(&fnext);
2921         dns_fixedname_init(&fprev);
2922         prev = dns_fixedname_name(&fprev);
2923         dns_fixedname_init(&forigin);
2924         origin = dns_fixedname_name(&forigin);
2925
2926         /*
2927          * Find if qname is at or below a empty node.
2928          * Use our own copy of the chain.
2929          */
2930
2931         chain = search->chain;
2932         do {
2933                 node = NULL;
2934                 result = dns_rbtnodechain_current(&chain, &name,
2935                                                   origin, &node);
2936                 if (result != ISC_R_SUCCESS)
2937                         break;
2938                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2939                           isc_rwlocktype_read);
2940                 for (header = node->data;
2941                      header != NULL;
2942                      header = header->next) {
2943                         if (header->serial <= search->serial &&
2944                             !IGNORE(header) && EXISTS(header))
2945                                 break;
2946                 }
2947                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2948                             isc_rwlocktype_read);
2949                 if (header != NULL)
2950                         break;
2951                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
2952         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
2953         if (result == ISC_R_SUCCESS)
2954                 result = dns_name_concatenate(&name, origin, prev, NULL);
2955         if (result != ISC_R_SUCCESS)
2956                 check_prev = ISC_FALSE;
2957
2958         result = dns_rbtnodechain_next(&chain, NULL, NULL);
2959         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2960                 node = NULL;
2961                 result = dns_rbtnodechain_current(&chain, &name,
2962                                                   origin, &node);
2963                 if (result != ISC_R_SUCCESS)
2964                         break;
2965                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2966                           isc_rwlocktype_read);
2967                 for (header = node->data;
2968                      header != NULL;
2969                      header = header->next) {
2970                         if (header->serial <= search->serial &&
2971                             !IGNORE(header) && EXISTS(header))
2972                                 break;
2973                 }
2974                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2975                             isc_rwlocktype_read);
2976                 if (header != NULL)
2977                         break;
2978                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2979         }
2980         if (result == ISC_R_SUCCESS)
2981                 result = dns_name_concatenate(&name, origin, next, NULL);
2982         if (result != ISC_R_SUCCESS)
2983                 check_next = ISC_FALSE;
2984
2985         dns_name_clone(qname, &rname);
2986
2987         /*
2988          * Remove the wildcard label to find the terminal name.
2989          */
2990         n = dns_name_countlabels(wname);
2991         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
2992
2993         do {
2994                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
2995                     (check_next && dns_name_issubdomain(next, &rname))) {
2996                         answer = ISC_TRUE;
2997                         break;
2998                 }
2999                 /*
3000                  * Remove the left hand label.
3001                  */
3002                 n = dns_name_countlabels(&rname);
3003                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3004         } while (!dns_name_equal(&rname, &tname));
3005         return (answer);
3006 }
3007
3008 static inline isc_result_t
3009 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3010               dns_name_t *qname)
3011 {
3012         unsigned int i, j;
3013         dns_rbtnode_t *node, *level_node, *wnode;
3014         rdatasetheader_t *header;
3015         isc_result_t result = ISC_R_NOTFOUND;
3016         dns_name_t name;
3017         dns_name_t *wname;
3018         dns_fixedname_t fwname;
3019         dns_rbtdb_t *rbtdb;
3020         isc_boolean_t done, wild, active;
3021         dns_rbtnodechain_t wchain;
3022
3023         /*
3024          * Caller must be holding the tree lock and MUST NOT be holding
3025          * any node locks.
3026          */
3027
3028         /*
3029          * Examine each ancestor level.  If the level's wild bit
3030          * is set, then construct the corresponding wildcard name and
3031          * search for it.  If the wildcard node exists, and is active in
3032          * this version, we're done.  If not, then we next check to see
3033          * if the ancestor is active in this version.  If so, then there
3034          * can be no possible wildcard match and again we're done.  If not,
3035          * continue the search.
3036          */
3037
3038         rbtdb = search->rbtdb;
3039         i = search->chain.level_matches;
3040         done = ISC_FALSE;
3041         node = *nodep;
3042         do {
3043                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3044                           isc_rwlocktype_read);
3045
3046                 /*
3047                  * First we try to figure out if this node is active in
3048                  * the search's version.  We do this now, even though we
3049                  * may not need the information, because it simplifies the
3050                  * locking and code flow.
3051                  */
3052                 for (header = node->data;
3053                      header != NULL;
3054                      header = header->next) {
3055                         if (header->serial <= search->serial &&
3056                             !IGNORE(header) && EXISTS(header))
3057                                 break;
3058                 }
3059                 if (header != NULL)
3060                         active = ISC_TRUE;
3061                 else
3062                         active = ISC_FALSE;
3063
3064                 if (node->wild)
3065                         wild = ISC_TRUE;
3066                 else
3067                         wild = ISC_FALSE;
3068
3069                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3070                             isc_rwlocktype_read);
3071
3072                 if (wild) {
3073                         /*
3074                          * Construct the wildcard name for this level.
3075                          */
3076                         dns_name_init(&name, NULL);
3077                         dns_rbt_namefromnode(node, &name);
3078                         dns_fixedname_init(&fwname);
3079                         wname = dns_fixedname_name(&fwname);
3080                         result = dns_name_concatenate(dns_wildcardname, &name,
3081                                                       wname, NULL);
3082                         j = i;
3083                         while (result == ISC_R_SUCCESS && j != 0) {
3084                                 j--;
3085                                 level_node = search->chain.levels[j];
3086                                 dns_name_init(&name, NULL);
3087                                 dns_rbt_namefromnode(level_node, &name);
3088                                 result = dns_name_concatenate(wname,
3089                                                               &name,
3090                                                               wname,
3091                                                               NULL);
3092                         }
3093                         if (result != ISC_R_SUCCESS)
3094                                 break;
3095
3096                         wnode = NULL;
3097                         dns_rbtnodechain_init(&wchain, NULL);
3098                         result = dns_rbt_findnode(rbtdb->tree, wname,
3099                                                   NULL, &wnode, &wchain,
3100                                                   DNS_RBTFIND_EMPTYDATA,
3101                                                   NULL, NULL);
3102                         if (result == ISC_R_SUCCESS) {
3103                                 nodelock_t *lock;
3104
3105                                 /*
3106                                  * We have found the wildcard node.  If it
3107                                  * is active in the search's version, we're
3108                                  * done.
3109                                  */
3110                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
3111                                 NODE_LOCK(lock, isc_rwlocktype_read);
3112                                 for (header = wnode->data;
3113                                      header != NULL;
3114                                      header = header->next) {
3115                                         if (header->serial <= search->serial &&
3116                                             !IGNORE(header) && EXISTS(header))
3117                                                 break;
3118                                 }
3119                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3120                                 if (header != NULL ||
3121                                     activeempty(search, &wchain, wname)) {
3122                                         if (activeemtpynode(search, qname,
3123                                                             wname)) {
3124                                                 return (ISC_R_NOTFOUND);
3125                                         }
3126                                         /*
3127                                          * The wildcard node is active!
3128                                          *
3129                                          * Note: result is still ISC_R_SUCCESS
3130                                          * so we don't have to set it.
3131                                          */
3132                                         *nodep = wnode;
3133                                         break;
3134                                 }
3135                         } else if (result != ISC_R_NOTFOUND &&
3136                                    result != DNS_R_PARTIALMATCH) {
3137                                 /*
3138                                  * An error has occurred.  Bail out.
3139                                  */
3140                                 break;
3141                         }
3142                 }
3143
3144                 if (active) {
3145                         /*
3146                          * The level node is active.  Any wildcarding
3147                          * present at higher levels has no
3148                          * effect and we're done.
3149                          */
3150                         result = ISC_R_NOTFOUND;
3151                         break;
3152                 }
3153
3154                 if (i > 0) {
3155                         i--;
3156                         node = search->chain.levels[i];
3157                 } else
3158                         done = ISC_TRUE;
3159         } while (!done);
3160
3161         return (result);
3162 }
3163
3164 static isc_boolean_t
3165 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3166 {
3167         dns_rdata_t rdata = DNS_RDATA_INIT;
3168         dns_rdata_nsec3_t nsec3;
3169         unsigned char *raw;                     /* RDATASLAB */
3170         unsigned int rdlen, count;
3171         isc_region_t region;
3172         isc_result_t result;
3173
3174         REQUIRE(header->type == dns_rdatatype_nsec3);
3175
3176         raw = (unsigned char *)header + sizeof(*header);
3177         count = raw[0] * 256 + raw[1]; /* count */
3178 #if DNS_RDATASET_FIXED
3179         raw += count * 4 + 2;
3180 #else
3181         raw += 2;
3182 #endif
3183         while (count-- > 0) {
3184                 rdlen = raw[0] * 256 + raw[1];
3185 #if DNS_RDATASET_FIXED
3186                 raw += 4;
3187 #else
3188                 raw += 2;
3189 #endif
3190                 region.base = raw;
3191                 region.length = rdlen;
3192                 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3193                                      dns_rdatatype_nsec3, &region);
3194                 raw += rdlen;
3195                 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3196                 INSIST(result == ISC_R_SUCCESS);
3197                 if (nsec3.hash == search->rbtversion->hash &&
3198                     nsec3.iterations == search->rbtversion->iterations &&
3199                     nsec3.salt_length == search->rbtversion->salt_length &&
3200                     memcmp(nsec3.salt, search->rbtversion->salt,
3201                            nsec3.salt_length) == 0)
3202                         return (ISC_TRUE);
3203                 dns_rdata_reset(&rdata);
3204         }
3205         return (ISC_FALSE);
3206 }
3207
3208 static inline isc_result_t
3209 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3210                   dns_name_t *foundname, dns_rdataset_t *rdataset,
3211                   dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3212                   dns_db_secure_t secure)
3213 {
3214         dns_rbtnode_t *node;
3215         rdatasetheader_t *header, *header_next, *found, *foundsig;
3216         isc_boolean_t empty_node;
3217         isc_result_t result;
3218         dns_fixedname_t fname, forigin;
3219         dns_name_t *name, *origin;
3220         dns_rdatatype_t type;
3221         rbtdb_rdatatype_t sigtype;
3222         isc_boolean_t wraps;
3223         isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3224
3225         if (tree == search->rbtdb->nsec3) {
3226                 type = dns_rdatatype_nsec3;
3227                 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3228                 wraps = ISC_TRUE;
3229         } else {
3230                 type = dns_rdatatype_nsec;
3231                 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3232                 wraps = ISC_FALSE;
3233         }
3234
3235  again:
3236         do {
3237                 node = NULL;
3238                 dns_fixedname_init(&fname);
3239                 name = dns_fixedname_name(&fname);
3240                 dns_fixedname_init(&forigin);
3241                 origin = dns_fixedname_name(&forigin);
3242                 result = dns_rbtnodechain_current(&search->chain, name,
3243                                                   origin, &node);
3244                 if (result != ISC_R_SUCCESS)
3245                         return (result);
3246                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3247                           isc_rwlocktype_read);
3248                 found = NULL;
3249                 foundsig = NULL;
3250                 empty_node = ISC_TRUE;
3251                 for (header = node->data;
3252                      header != NULL;
3253                      header = header_next) {
3254                         header_next = header->next;
3255                         /*
3256                          * Look for an active, extant NSEC or RRSIG NSEC.
3257                          */
3258                         do {
3259                                 if (header->serial <= search->serial &&
3260                                     !IGNORE(header)) {
3261                                         /*
3262                                          * Is this a "this rdataset doesn't
3263                                          * exist" record?
3264                                          */
3265                                         if (NONEXISTENT(header))
3266                                                 header = NULL;
3267                                         break;
3268                                 } else
3269                                         header = header->down;
3270                         } while (header != NULL);
3271                         if (header != NULL) {
3272                                 /*
3273                                  * We now know that there is at least one
3274                                  * active rdataset at this node.
3275                                  */
3276                                 empty_node = ISC_FALSE;
3277                                 if (header->type == type) {
3278                                         found = header;
3279                                         if (foundsig != NULL)
3280                                                 break;
3281                                 } else if (header->type == sigtype) {
3282                                         foundsig = header;
3283                                         if (found != NULL)
3284                                                 break;
3285                                 }
3286                         }
3287                 }
3288                 if (!empty_node) {
3289                         if (found != NULL && search->rbtversion->havensec3 &&
3290                             found->type == dns_rdatatype_nsec3 &&
3291                             !matchparams(found, search)) {
3292                                 empty_node = ISC_TRUE;
3293                                 found = NULL;
3294                                 foundsig = NULL;
3295                                 result = dns_rbtnodechain_prev(&search->chain,
3296                                                                NULL, NULL);
3297                         } else if (found != NULL &&
3298                                    (foundsig != NULL || !need_sig))
3299                         {
3300                                 /*
3301                                  * We've found the right NSEC/NSEC3 record.
3302                                  *
3303                                  * Note: for this to really be the right
3304                                  * NSEC record, it's essential that the NSEC
3305                                  * records of any nodes obscured by a zone
3306                                  * cut have been removed; we assume this is
3307                                  * the case.
3308                                  */
3309                                 result = dns_name_concatenate(name, origin,
3310                                                               foundname, NULL);
3311                                 if (result == ISC_R_SUCCESS) {
3312                                         if (nodep != NULL) {
3313                                                 new_reference(search->rbtdb,
3314                                                               node);
3315                                                 *nodep = node;
3316                                         }
3317                                         bind_rdataset(search->rbtdb, node,
3318                                                       found, search->now,
3319                                                       rdataset);
3320                                         if (foundsig != NULL)
3321                                                 bind_rdataset(search->rbtdb,
3322                                                               node,
3323                                                               foundsig,
3324                                                               search->now,
3325                                                               sigrdataset);
3326                                 }
3327                         } else if (found == NULL && foundsig == NULL) {
3328                                 /*
3329                                  * This node is active, but has no NSEC or
3330                                  * RRSIG NSEC.  That means it's glue or
3331                                  * other obscured zone data that isn't
3332                                  * relevant for our search.  Treat the
3333                                  * node as if it were empty and keep looking.
3334                                  */
3335                                 empty_node = ISC_TRUE;
3336                                 result = dns_rbtnodechain_prev(&search->chain,
3337                                                                NULL, NULL);
3338                         } else {
3339                                 /*
3340                                  * We found an active node, but either the
3341                                  * NSEC or the RRSIG NSEC is missing.  This
3342                                  * shouldn't happen.
3343                                  */
3344                                 result = DNS_R_BADDB;
3345                         }
3346                 } else {
3347                         /*
3348                          * This node isn't active.  We've got to keep
3349                          * looking.
3350                          */
3351                         result = dns_rbtnodechain_prev(&search->chain, NULL,
3352                                                        NULL);
3353                 }
3354                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3355                             isc_rwlocktype_read);
3356         } while (empty_node && result == ISC_R_SUCCESS);
3357
3358         if (result == ISC_R_NOMORE && wraps) {
3359                 result = dns_rbtnodechain_last(&search->chain, tree,
3360                                                NULL, NULL);
3361                 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3362                         wraps = ISC_FALSE;
3363                         goto again;
3364                 }
3365         }
3366
3367         /*
3368          * If the result is ISC_R_NOMORE, then we got to the beginning of
3369          * the database and didn't find a NSEC record.  This shouldn't
3370          * happen.
3371          */
3372         if (result == ISC_R_NOMORE)
3373                 result = DNS_R_BADDB;
3374
3375         return (result);
3376 }
3377
3378 static isc_result_t
3379 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3380           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3381           dns_dbnode_t **nodep, dns_name_t *foundname,
3382           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3383 {
3384         dns_rbtnode_t *node = NULL;
3385         isc_result_t result;
3386         rbtdb_search_t search;
3387         isc_boolean_t cname_ok = ISC_TRUE;
3388         isc_boolean_t close_version = ISC_FALSE;
3389         isc_boolean_t maybe_zonecut = ISC_FALSE;
3390         isc_boolean_t at_zonecut = ISC_FALSE;
3391         isc_boolean_t wild;
3392         isc_boolean_t empty_node;
3393         rdatasetheader_t *header, *header_next, *found, *nsecheader;
3394         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3395         rbtdb_rdatatype_t sigtype;
3396         isc_boolean_t active;
3397         dns_rbtnodechain_t chain;
3398         nodelock_t *lock;
3399         dns_rbt_t *tree;
3400
3401         search.rbtdb = (dns_rbtdb_t *)db;
3402
3403         REQUIRE(VALID_RBTDB(search.rbtdb));
3404
3405         /*
3406          * We don't care about 'now'.
3407          */
3408         UNUSED(now);
3409
3410         /*
3411          * If the caller didn't supply a version, attach to the current
3412          * version.
3413          */
3414         if (version == NULL) {
3415                 currentversion(db, &version);
3416                 close_version = ISC_TRUE;
3417         }
3418
3419         search.rbtversion = version;
3420         search.serial = search.rbtversion->serial;
3421         search.options = options;
3422         search.copy_name = ISC_FALSE;
3423         search.need_cleanup = ISC_FALSE;
3424         search.wild = ISC_FALSE;
3425         search.zonecut = NULL;
3426         dns_fixedname_init(&search.zonecut_name);
3427         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3428         search.now = 0;
3429
3430         /*
3431          * 'wild' will be true iff. we've matched a wildcard.
3432          */
3433         wild = ISC_FALSE;
3434
3435         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3436
3437         /*
3438          * Search down from the root of the tree.  If, while going down, we
3439          * encounter a callback node, zone_zonecut_callback() will search the
3440          * rdatasets at the zone cut for active DNAME or NS rdatasets.
3441          */
3442         tree =  (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3443                                                          search.rbtdb->tree;
3444         result = dns_rbt_findnode(tree, name, foundname, &node,
3445                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
3446                                   zone_zonecut_callback, &search);
3447
3448         if (result == DNS_R_PARTIALMATCH) {
3449         partial_match:
3450                 if (search.zonecut != NULL) {
3451                     result = setup_delegation(&search, nodep, foundname,
3452                                               rdataset, sigrdataset);
3453                     goto tree_exit;
3454                 }
3455
3456                 if (search.wild) {
3457                         /*
3458                          * At least one of the levels in the search chain
3459                          * potentially has a wildcard.  For each such level,
3460                          * we must see if there's a matching wildcard active
3461                          * in the current version.
3462                          */
3463                         result = find_wildcard(&search, &node, name);
3464                         if (result == ISC_R_SUCCESS) {
3465                                 result = dns_name_copy(name, foundname, NULL);
3466                                 if (result != ISC_R_SUCCESS)
3467                                         goto tree_exit;
3468                                 wild = ISC_TRUE;
3469                                 goto found;
3470                         }
3471                         else if (result != ISC_R_NOTFOUND)
3472                                 goto tree_exit;
3473                 }
3474
3475                 chain = search.chain;
3476                 active = activeempty(&search, &chain, name);
3477
3478                 /*
3479                  * If we're here, then the name does not exist, is not
3480                  * beneath a zonecut, and there's no matching wildcard.
3481                  */
3482                 if ((search.rbtversion->secure == dns_db_secure &&
3483                      !search.rbtversion->havensec3) ||
3484                     (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3485                     (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3486                 {
3487                         result = find_closest_nsec(&search, nodep, foundname,
3488                                                    rdataset, sigrdataset, tree,
3489                                                    search.rbtversion->secure);
3490                         if (result == ISC_R_SUCCESS)
3491                                 result = active ? DNS_R_EMPTYNAME :
3492                                                   DNS_R_NXDOMAIN;
3493                 } else
3494                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3495                 goto tree_exit;
3496         } else if (result != ISC_R_SUCCESS)
3497                 goto tree_exit;
3498
3499  found:
3500         /*
3501          * We have found a node whose name is the desired name, or we
3502          * have matched a wildcard.
3503          */
3504
3505         if (search.zonecut != NULL) {
3506                 /*
3507                  * If we're beneath a zone cut, we don't want to look for
3508                  * CNAMEs because they're not legitimate zone glue.
3509                  */
3510                 cname_ok = ISC_FALSE;
3511         } else {
3512                 /*
3513                  * The node may be a zone cut itself.  If it might be one,
3514                  * make sure we check for it later.
3515                  *
3516                  * DS records live above the zone cut in ordinary zone so
3517                  * we want to ignore any referral.
3518                  *
3519                  * Stub zones don't have anything "above" the delgation so
3520                  * we always return a referral.
3521                  */
3522                 if (node->find_callback &&
3523                     ((node != search.rbtdb->origin_node &&
3524                       !dns_rdatatype_atparent(type)) ||
3525                      IS_STUB(search.rbtdb)))
3526                         maybe_zonecut = ISC_TRUE;
3527         }
3528
3529         /*
3530          * Certain DNSSEC types are not subject to CNAME matching
3531          * (RFC4035, section 2.5 and RFC3007).
3532          *
3533          * We don't check for RRSIG, because we don't store RRSIG records
3534          * directly.
3535          */
3536         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3537                 cname_ok = ISC_FALSE;
3538
3539         /*
3540          * We now go looking for rdata...
3541          */
3542
3543         lock = &search.rbtdb->node_locks[node->locknum].lock;
3544         NODE_LOCK(lock, isc_rwlocktype_read);
3545
3546         found = NULL;
3547         foundsig = NULL;
3548         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3549         nsecheader = NULL;
3550         nsecsig = NULL;
3551         cnamesig = NULL;
3552         empty_node = ISC_TRUE;
3553         for (header = node->data; header != NULL; header = header_next) {
3554                 header_next = header->next;
3555                 /*
3556                  * Look for an active, extant rdataset.
3557                  */
3558                 do {
3559                         if (header->serial <= search.serial &&
3560                             !IGNORE(header)) {
3561                                 /*
3562                                  * Is this a "this rdataset doesn't
3563                                  * exist" record?
3564                                  */
3565                                 if (NONEXISTENT(header))
3566                                         header = NULL;
3567                                 break;
3568                         } else
3569                                 header = header->down;
3570                 } while (header != NULL);
3571                 if (header != NULL) {
3572                         /*
3573                          * We now know that there is at least one active
3574                          * rdataset at this node.
3575                          */
3576                         empty_node = ISC_FALSE;
3577
3578                         /*
3579                          * Do special zone cut handling, if requested.
3580                          */
3581                         if (maybe_zonecut &&
3582                             header->type == dns_rdatatype_ns) {
3583                                 /*
3584                                  * We increment the reference count on node to
3585                                  * ensure that search->zonecut_rdataset will
3586                                  * still be valid later.
3587                                  */
3588                                 new_reference(search.rbtdb, node);
3589                                 search.zonecut = node;
3590                                 search.zonecut_rdataset = header;
3591                                 search.zonecut_sigrdataset = NULL;
3592                                 search.need_cleanup = ISC_TRUE;
3593                                 maybe_zonecut = ISC_FALSE;
3594                                 at_zonecut = ISC_TRUE;
3595                                 /*
3596                                  * It is not clear if KEY should still be
3597                                  * allowed at the parent side of the zone
3598                                  * cut or not.  It is needed for RFC3007
3599                                  * validated updates.
3600                                  */
3601                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3602                                     && type != dns_rdatatype_nsec
3603                                     && type != dns_rdatatype_key) {
3604                                         /*
3605                                          * Glue is not OK, but any answer we
3606                                          * could return would be glue.  Return
3607                                          * the delegation.
3608                                          */
3609                                         found = NULL;
3610                                         break;
3611                                 }
3612                                 if (found != NULL && foundsig != NULL)
3613                                         break;
3614                         }
3615
3616
3617                         /*
3618                          * If the NSEC3 record doesn't match the chain
3619                          * we are using behave as if it isn't here.
3620                          */
3621                         if (header->type == dns_rdatatype_nsec3 &&
3622                            !matchparams(header, &search)) {
3623                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3624                                 goto partial_match;
3625                         }
3626                         /*
3627                          * If we found a type we were looking for,
3628                          * remember it.
3629                          */
3630                         if (header->type == type ||
3631                             type == dns_rdatatype_any ||
3632                             (header->type == dns_rdatatype_cname &&
3633                              cname_ok)) {
3634                                 /*
3635                                  * We've found the answer!
3636                                  */
3637                                 found = header;
3638                                 if (header->type == dns_rdatatype_cname &&
3639                                     cname_ok) {
3640                                         /*
3641                                          * We may be finding a CNAME instead
3642                                          * of the desired type.
3643                                          *
3644                                          * If we've already got the CNAME RRSIG,
3645                                          * use it, otherwise change sigtype
3646                                          * so that we find it.
3647                                          */
3648                                         if (cnamesig != NULL)
3649                                                 foundsig = cnamesig;
3650                                         else
3651                                                 sigtype =
3652                                                     RBTDB_RDATATYPE_SIGCNAME;
3653                                 }
3654                                 /*
3655                                  * If we've got all we need, end the search.
3656                                  */
3657                                 if (!maybe_zonecut && foundsig != NULL)
3658                                         break;
3659                         } else if (header->type == sigtype) {
3660                                 /*
3661                                  * We've found the RRSIG rdataset for our
3662                                  * target type.  Remember it.
3663                                  */
3664                                 foundsig = header;
3665                                 /*
3666                                  * If we've got all we need, end the search.
3667                                  */
3668                                 if (!maybe_zonecut && found != NULL)
3669                                         break;
3670                         } else if (header->type == dns_rdatatype_nsec &&
3671                                    !search.rbtversion->havensec3) {
3672                                 /*
3673                                  * Remember a NSEC rdataset even if we're
3674                                  * not specifically looking for it, because
3675                                  * we might need it later.
3676                                  */
3677                                 nsecheader = header;
3678                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3679                                    !search.rbtversion->havensec3) {
3680                                 /*
3681                                  * If we need the NSEC rdataset, we'll also
3682                                  * need its signature.
3683                                  */
3684                                 nsecsig = header;
3685                         } else if (cname_ok &&
3686                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3687                                 /*
3688                                  * If we get a CNAME match, we'll also need
3689                                  * its signature.
3690                                  */
3691                                 cnamesig = header;
3692                         }
3693                 }
3694         }
3695
3696         if (empty_node) {
3697                 /*
3698                  * We have an exact match for the name, but there are no
3699                  * active rdatasets in the desired version.  That means that
3700                  * this node doesn't exist in the desired version, and that
3701                  * we really have a partial match.
3702                  */
3703                 if (!wild) {
3704                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3705                         goto partial_match;
3706                 }
3707         }
3708
3709         /*
3710          * If we didn't find what we were looking for...
3711          */
3712         if (found == NULL) {
3713                 if (search.zonecut != NULL) {
3714                         /*
3715                          * We were trying to find glue at a node beneath a
3716                          * zone cut, but didn't.
3717                          *
3718                          * Return the delegation.
3719                          */
3720                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3721                         result = setup_delegation(&search, nodep, foundname,
3722                                                   rdataset, sigrdataset);
3723                         goto tree_exit;
3724                 }
3725                 /*
3726                  * The desired type doesn't exist.
3727                  */
3728                 result = DNS_R_NXRRSET;
3729                 if (search.rbtversion->secure == dns_db_secure &&
3730                     !search.rbtversion->havensec3 &&
3731                     (nsecheader == NULL || nsecsig == NULL)) {
3732                         /*
3733                          * The zone is secure but there's no NSEC,
3734                          * or the NSEC has no signature!
3735                          */
3736                         if (!wild) {
3737                                 result = DNS_R_BADDB;
3738                                 goto node_exit;
3739                         }
3740
3741                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3742                         result = find_closest_nsec(&search, nodep, foundname,
3743                                                    rdataset, sigrdataset,
3744                                                    search.rbtdb->tree,
3745                                                    search.rbtversion->secure);
3746                         if (result == ISC_R_SUCCESS)
3747                                 result = DNS_R_EMPTYWILD;
3748                         goto tree_exit;
3749                 }
3750                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3751                     nsecheader == NULL)
3752                 {
3753                         /*
3754                          * There's no NSEC record, and we were told
3755                          * to find one.
3756                          */
3757                         result = DNS_R_BADDB;
3758                         goto node_exit;
3759                 }
3760                 if (nodep != NULL) {
3761                         new_reference(search.rbtdb, node);
3762                         *nodep = node;
3763                 }
3764                 if ((search.rbtversion->secure == dns_db_secure &&
3765                      !search.rbtversion->havensec3) ||
3766                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
3767                 {
3768                         bind_rdataset(search.rbtdb, node, nsecheader,
3769                                       0, rdataset);
3770                         if (nsecsig != NULL)
3771                                 bind_rdataset(search.rbtdb, node,
3772                                               nsecsig, 0, sigrdataset);
3773                 }
3774                 if (wild)
3775                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3776                 goto node_exit;
3777         }
3778
3779         /*
3780          * We found what we were looking for, or we found a CNAME.
3781          */
3782
3783         if (type != found->type &&
3784             type != dns_rdatatype_any &&
3785             found->type == dns_rdatatype_cname) {
3786                 /*
3787                  * We weren't doing an ANY query and we found a CNAME instead
3788                  * of the type we were looking for, so we need to indicate
3789                  * that result to the caller.
3790                  */
3791                 result = DNS_R_CNAME;
3792         } else if (search.zonecut != NULL) {
3793                 /*
3794                  * If we're beneath a zone cut, we must indicate that the
3795                  * result is glue, unless we're actually at the zone cut
3796                  * and the type is NSEC or KEY.
3797                  */
3798                 if (search.zonecut == node) {
3799                         /*
3800                          * It is not clear if KEY should still be
3801                          * allowed at the parent side of the zone
3802                          * cut or not.  It is needed for RFC3007
3803                          * validated updates.
3804                          */
3805                         if (type == dns_rdatatype_nsec ||
3806                             type == dns_rdatatype_nsec3 ||
3807                             type == dns_rdatatype_key)
3808                                 result = ISC_R_SUCCESS;
3809                         else if (type == dns_rdatatype_any)
3810                                 result = DNS_R_ZONECUT;
3811                         else
3812                                 result = DNS_R_GLUE;
3813                 } else
3814                         result = DNS_R_GLUE;
3815                 /*
3816                  * We might have found data that isn't glue, but was occluded
3817                  * by a dynamic update.  If the caller cares about this, they
3818                  * will have told us to validate glue.
3819                  *
3820                  * XXX We should cache the glue validity state!
3821                  */
3822                 if (result == DNS_R_GLUE &&
3823                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3824                     !valid_glue(&search, foundname, type, node)) {
3825                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3826                         result = setup_delegation(&search, nodep, foundname,
3827                                                   rdataset, sigrdataset);
3828                     goto tree_exit;
3829                 }
3830         } else {
3831                 /*
3832                  * An ordinary successful query!
3833                  */
3834                 result = ISC_R_SUCCESS;
3835         }
3836
3837         if (nodep != NULL) {
3838                 if (!at_zonecut)
3839                         new_reference(search.rbtdb, node);
3840                 else
3841                         search.need_cleanup = ISC_FALSE;
3842                 *nodep = node;
3843         }
3844
3845         if (type != dns_rdatatype_any) {
3846                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3847                 if (foundsig != NULL)
3848                         bind_rdataset(search.rbtdb, node, foundsig, 0,
3849                                       sigrdataset);
3850         }
3851
3852         if (wild)
3853                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3854
3855  node_exit:
3856         NODE_UNLOCK(lock, isc_rwlocktype_read);
3857
3858  tree_exit:
3859         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3860
3861         /*
3862          * If we found a zonecut but aren't going to use it, we have to
3863          * let go of it.
3864          */
3865         if (search.need_cleanup) {
3866                 node = search.zonecut;
3867                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3868
3869                 NODE_LOCK(lock, isc_rwlocktype_read);
3870                 decrement_reference(search.rbtdb, node, 0,
3871                                     isc_rwlocktype_read, isc_rwlocktype_none,
3872                                     ISC_FALSE);
3873                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3874         }
3875
3876         if (close_version)
3877                 closeversion(db, &version, ISC_FALSE);
3878
3879         dns_rbtnodechain_reset(&search.chain);
3880
3881         return (result);
3882 }
3883
3884 static isc_result_t
3885 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3886                  isc_stdtime_t now, dns_dbnode_t **nodep,
3887                  dns_name_t *foundname,
3888                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3889 {
3890         UNUSED(db);
3891         UNUSED(name);
3892         UNUSED(options);
3893         UNUSED(now);
3894         UNUSED(nodep);
3895         UNUSED(foundname);
3896         UNUSED(rdataset);
3897         UNUSED(sigrdataset);
3898
3899         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3900
3901         return (ISC_R_NOTIMPLEMENTED);
3902 }
3903
3904 static isc_result_t
3905 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3906         rbtdb_search_t *search = arg;
3907         rdatasetheader_t *header, *header_prev, *header_next;
3908         rdatasetheader_t *dname_header, *sigdname_header;
3909         isc_result_t result;
3910         nodelock_t *lock;
3911         isc_rwlocktype_t locktype;
3912
3913         /* XXX comment */
3914
3915         REQUIRE(search->zonecut == NULL);
3916
3917         /*
3918          * Keep compiler silent.
3919          */
3920         UNUSED(name);
3921
3922         lock = &(search->rbtdb->node_locks[node->locknum].lock);
3923         locktype = isc_rwlocktype_read;
3924         NODE_LOCK(lock, locktype);
3925
3926         /*
3927          * Look for a DNAME or RRSIG DNAME rdataset.
3928          */
3929         dname_header = NULL;
3930         sigdname_header = NULL;
3931         header_prev = NULL;
3932         for (header = node->data; header != NULL; header = header_next) {
3933                 header_next = header->next;
3934                 if (header->rdh_ttl <= search->now) {
3935                         /*
3936                          * This rdataset is stale.  If no one else is
3937                          * using the node, we can clean it up right
3938                          * now, otherwise we mark it as stale, and
3939                          * the node as dirty, so it will get cleaned
3940                          * up later.
3941                          */
3942                         if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
3943                             (locktype == isc_rwlocktype_write ||
3944                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3945                                 /*
3946                                  * We update the node's status only when we
3947                                  * can get write access; otherwise, we leave
3948                                  * others to this work.  Periodical cleaning
3949                                  * will eventually take the job as the last
3950                                  * resort.
3951                                  * We won't downgrade the lock, since other
3952                                  * rdatasets are probably stale, too.
3953                                  */
3954                                 locktype = isc_rwlocktype_write;
3955
3956                                 if (dns_rbtnode_refcurrent(node) == 0) {
3957                                         isc_mem_t *mctx;
3958
3959                                         /*
3960                                          * header->down can be non-NULL if the
3961                                          * refcount has just decremented to 0
3962                                          * but decrement_reference() has not
3963                                          * performed clean_cache_node(), in
3964                                          * which case we need to purge the
3965                                          * stale headers first.
3966                                          */
3967                                         mctx = search->rbtdb->common.mctx;
3968                                         clean_stale_headers(search->rbtdb,
3969                                                             mctx,
3970                                                             header);
3971                                         if (header_prev != NULL)
3972                                                 header_prev->next =
3973                                                         header->next;
3974                                         else
3975                                                 node->data = header->next;
3976                                         free_rdataset(search->rbtdb, mctx,
3977                                                       header);
3978                                 } else {
3979                                         header->attributes |=
3980                                                 RDATASET_ATTR_STALE;
3981                                         node->dirty = 1;
3982                                         header_prev = header;
3983                                 }
3984                         } else
3985                                 header_prev = header;
3986                 } else if (header->type == dns_rdatatype_dname &&
3987                            EXISTS(header)) {
3988                         dname_header = header;
3989                         header_prev = header;
3990                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
3991                          EXISTS(header)) {
3992                         sigdname_header = header;
3993                         header_prev = header;
3994                 } else
3995                         header_prev = header;
3996         }
3997
3998         if (dname_header != NULL &&
3999             (!DNS_TRUST_PENDING(dname_header->trust) ||
4000              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4001                 /*
4002                  * We increment the reference count on node to ensure that
4003                  * search->zonecut_rdataset will still be valid later.
4004                  */
4005                 new_reference(search->rbtdb, node);
4006                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4007                 search->zonecut = node;
4008                 search->zonecut_rdataset = dname_header;
4009                 search->zonecut_sigrdataset = sigdname_header;
4010                 search->need_cleanup = ISC_TRUE;
4011                 result = DNS_R_PARTIALMATCH;
4012         } else
4013                 result = DNS_R_CONTINUE;
4014
4015         NODE_UNLOCK(lock, locktype);
4016
4017         return (result);
4018 }
4019
4020 static inline isc_result_t
4021 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4022                      dns_dbnode_t **nodep, dns_name_t *foundname,
4023                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4024 {
4025         unsigned int i;
4026         dns_rbtnode_t *level_node;
4027         rdatasetheader_t *header, *header_prev, *header_next;
4028         rdatasetheader_t *found, *foundsig;
4029         isc_result_t result = ISC_R_NOTFOUND;
4030         dns_name_t name;
4031         dns_rbtdb_t *rbtdb;
4032         isc_boolean_t done;
4033         nodelock_t *lock;
4034         isc_rwlocktype_t locktype;
4035
4036         /*
4037          * Caller must be holding the tree lock.
4038          */
4039
4040         rbtdb = search->rbtdb;
4041         i = search->chain.level_matches;
4042         done = ISC_FALSE;
4043         do {
4044                 locktype = isc_rwlocktype_read;
4045                 lock = &rbtdb->node_locks[node->locknum].lock;
4046                 NODE_LOCK(lock, locktype);
4047
4048                 /*
4049                  * Look for NS and RRSIG NS rdatasets.
4050                  */
4051                 found = NULL;
4052                 foundsig = NULL;
4053                 header_prev = NULL;
4054                 for (header = node->data;
4055                      header != NULL;
4056                      header = header_next) {
4057                         header_next = header->next;
4058                         if (header->rdh_ttl <= search->now) {
4059                                 /*
4060                                  * This rdataset is stale.  If no one else is
4061                                  * using the node, we can clean it up right
4062                                  * now, otherwise we mark it as stale, and
4063                                  * the node as dirty, so it will get cleaned
4064                                  * up later.
4065                                  */
4066                                 if ((header->rdh_ttl <= search->now -
4067                                                     RBTDB_VIRTUAL) &&
4068                                     (locktype == isc_rwlocktype_write ||
4069                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4070                                         /*
4071                                          * We update the node's status only
4072                                          * when we can get write access.
4073                                          */
4074                                         locktype = isc_rwlocktype_write;
4075
4076                                         if (dns_rbtnode_refcurrent(node)
4077                                             == 0) {
4078                                                 isc_mem_t *m;
4079
4080                                                 m = search->rbtdb->common.mctx;
4081                                                 clean_stale_headers(
4082                                                         search->rbtdb,
4083                                                         m, header);
4084                                                 if (header_prev != NULL)
4085                                                         header_prev->next =
4086                                                                 header->next;
4087                                                 else
4088                                                         node->data =
4089                                                                 header->next;
4090                                                 free_rdataset(rbtdb, m,
4091                                                               header);
4092                                         } else {
4093                                                 header->attributes |=
4094                                                         RDATASET_ATTR_STALE;
4095                                                 node->dirty = 1;
4096                                                 header_prev = header;
4097                                         }
4098                                 } else
4099                                         header_prev = header;
4100                         } else if (EXISTS(header)) {
4101                                 /*
4102                                  * We've found an extant rdataset.  See if
4103                                  * we're interested in it.
4104                                  */
4105                                 if (header->type == dns_rdatatype_ns) {
4106                                         found = header;
4107                                         if (foundsig != NULL)
4108                                                 break;
4109                                 } else if (header->type ==
4110                                            RBTDB_RDATATYPE_SIGNS) {
4111                                         foundsig = header;
4112                                         if (found != NULL)
4113                                                 break;
4114                                 }
4115                                 header_prev = header;
4116                         } else
4117                                 header_prev = header;
4118                 }
4119
4120                 if (found != NULL) {
4121                         /*
4122                          * If we have to set foundname, we do it before
4123                          * anything else.  If we were to set foundname after
4124                          * we had set nodep or bound the rdataset, then we'd
4125                          * have to undo that work if dns_name_concatenate()
4126                          * failed.  By setting foundname first, there's
4127                          * nothing to undo if we have trouble.
4128                          */
4129                         if (foundname != NULL) {
4130                                 dns_name_init(&name, NULL);
4131                                 dns_rbt_namefromnode(node, &name);
4132                                 result = dns_name_copy(&name, foundname, NULL);
4133                                 while (result == ISC_R_SUCCESS && i > 0) {
4134                                         i--;
4135                                         level_node = search->chain.levels[i];
4136                                         dns_name_init(&name, NULL);
4137                                         dns_rbt_namefromnode(level_node,
4138                                                              &name);
4139                                         result =
4140                                                 dns_name_concatenate(foundname,
4141                                                                      &name,
4142                                                                      foundname,
4143                                                                      NULL);
4144                                 }
4145                                 if (result != ISC_R_SUCCESS) {
4146                                         *nodep = NULL;
4147                                         goto node_exit;
4148                                 }
4149                         }
4150                         result = DNS_R_DELEGATION;
4151                         if (nodep != NULL) {
4152                                 new_reference(search->rbtdb, node);
4153                                 *nodep = node;
4154                         }
4155                         bind_rdataset(search->rbtdb, node, found, search->now,
4156                                       rdataset);
4157                         if (foundsig != NULL)
4158                                 bind_rdataset(search->rbtdb, node, foundsig,
4159                                               search->now, sigrdataset);
4160                         if (need_headerupdate(found, search->now) ||
4161                             (foundsig != NULL &&
4162                              need_headerupdate(foundsig, search->now))) {
4163                                 if (locktype != isc_rwlocktype_write) {
4164                                         NODE_UNLOCK(lock, locktype);
4165                                         NODE_LOCK(lock, isc_rwlocktype_write);
4166                                         locktype = isc_rwlocktype_write;
4167                                 }
4168                                 if (need_headerupdate(found, search->now))
4169                                         update_header(search->rbtdb, found,
4170                                                       search->now);
4171                                 if (foundsig != NULL &&
4172                                     need_headerupdate(foundsig, search->now)) {
4173                                         update_header(search->rbtdb, foundsig,
4174                                                       search->now);
4175                                 }
4176                         }
4177                 }
4178
4179         node_exit:
4180                 NODE_UNLOCK(lock, locktype);
4181
4182                 if (found == NULL && i > 0) {
4183                         i--;
4184                         node = search->chain.levels[i];
4185                 } else
4186                         done = ISC_TRUE;
4187
4188         } while (!done);
4189
4190         return (result);
4191 }
4192
4193 static isc_result_t
4194 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4195                   isc_stdtime_t now, dns_name_t *foundname,
4196                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4197 {
4198         dns_rbtnode_t *node;
4199         rdatasetheader_t *header, *header_next, *header_prev;
4200         rdatasetheader_t *found, *foundsig;
4201         isc_boolean_t empty_node;
4202         isc_result_t result;
4203         dns_fixedname_t fname, forigin;
4204         dns_name_t *name, *origin;
4205         rbtdb_rdatatype_t matchtype, sigmatchtype;
4206         nodelock_t *lock;
4207         isc_rwlocktype_t locktype;
4208
4209         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4210         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4211                                              dns_rdatatype_nsec);
4212
4213         do {
4214                 node = NULL;
4215                 dns_fixedname_init(&fname);
4216                 name = dns_fixedname_name(&fname);
4217                 dns_fixedname_init(&forigin);
4218                 origin = dns_fixedname_name(&forigin);
4219                 result = dns_rbtnodechain_current(&search->chain, name,
4220                                                   origin, &node);
4221                 if (result != ISC_R_SUCCESS)
4222                         return (result);
4223                 locktype = isc_rwlocktype_read;
4224                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4225                 NODE_LOCK(lock, locktype);
4226                 found = NULL;
4227                 foundsig = NULL;
4228                 empty_node = ISC_TRUE;
4229                 header_prev = NULL;
4230                 for (header = node->data;
4231                      header != NULL;
4232                      header = header_next) {
4233                         header_next = header->next;
4234                         if (header->rdh_ttl <= now) {
4235                                 /*
4236                                  * This rdataset is stale.  If no one else is
4237                                  * using the node, we can clean it up right
4238                                  * now, otherwise we mark it as stale, and the
4239                                  * node as dirty, so it will get cleaned up
4240                                  * later.
4241                                  */
4242                                 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4243                                     (locktype == isc_rwlocktype_write ||
4244                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4245                                         /*
4246                                          * We update the node's status only
4247                                          * when we can get write access.
4248                                          */
4249                                         locktype = isc_rwlocktype_write;
4250
4251                                         if (dns_rbtnode_refcurrent(node)
4252                                             == 0) {
4253                                                 isc_mem_t *m;
4254
4255                                                 m = search->rbtdb->common.mctx;
4256                                                 clean_stale_headers(
4257                                                         search->rbtdb,
4258                                                         m, header);
4259                                                 if (header_prev != NULL)
4260                                                         header_prev->next =
4261                                                                 header->next;
4262                                                 else
4263                                                         node->data = header->next;
4264                                                 free_rdataset(search->rbtdb, m,
4265                                                               header);
4266                                         } else {
4267                                                 header->attributes |=
4268                                                         RDATASET_ATTR_STALE;
4269                                                 node->dirty = 1;
4270                                                 header_prev = header;
4271                                         }
4272                                 } else
4273                                         header_prev = header;
4274                                 continue;
4275                         }
4276                         if (NONEXISTENT(header) ||
4277                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
4278                                 header_prev = header;
4279                                 continue;
4280                         }
4281                         empty_node = ISC_FALSE;
4282                         if (header->type == matchtype)
4283                                 found = header;
4284                         else if (header->type == sigmatchtype)
4285                                 foundsig = header;
4286                         header_prev = header;
4287                 }
4288                 if (found != NULL) {
4289                         result = dns_name_concatenate(name, origin,
4290                                                       foundname, NULL);
4291                         if (result != ISC_R_SUCCESS)
4292                                 goto unlock_node;
4293                         bind_rdataset(search->rbtdb, node, found,
4294                                       now, rdataset);
4295                         if (foundsig != NULL)
4296                                 bind_rdataset(search->rbtdb, node, foundsig,
4297                                               now, sigrdataset);
4298                         new_reference(search->rbtdb, node);
4299                         *nodep = node;
4300                         result = DNS_R_COVERINGNSEC;
4301                 } else if (!empty_node) {
4302                         result = ISC_R_NOTFOUND;
4303                 } else
4304                         result = dns_rbtnodechain_prev(&search->chain, NULL,
4305                                                        NULL);
4306  unlock_node:
4307                 NODE_UNLOCK(lock, locktype);
4308         } while (empty_node && result == ISC_R_SUCCESS);
4309         return (result);
4310 }
4311
4312 static isc_result_t
4313 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4314            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4315            dns_dbnode_t **nodep, dns_name_t *foundname,
4316            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4317 {
4318         dns_rbtnode_t *node = NULL;
4319         isc_result_t result;
4320         rbtdb_search_t search;
4321         isc_boolean_t cname_ok = ISC_TRUE;
4322         isc_boolean_t empty_node;
4323         nodelock_t *lock;
4324         isc_rwlocktype_t locktype;
4325         rdatasetheader_t *header, *header_prev, *header_next;
4326         rdatasetheader_t *found, *nsheader;
4327         rdatasetheader_t *foundsig, *nssig, *cnamesig;
4328         rdatasetheader_t *update, *updatesig;
4329         rbtdb_rdatatype_t sigtype, negtype;
4330
4331         UNUSED(version);
4332
4333         search.rbtdb = (dns_rbtdb_t *)db;
4334
4335         REQUIRE(VALID_RBTDB(search.rbtdb));
4336         REQUIRE(version == NULL);
4337
4338         if (now == 0)
4339                 isc_stdtime_get(&now);
4340
4341         search.rbtversion = NULL;
4342         search.serial = 1;
4343         search.options = options;
4344         search.copy_name = ISC_FALSE;
4345         search.need_cleanup = ISC_FALSE;
4346         search.wild = ISC_FALSE;
4347         search.zonecut = NULL;
4348         dns_fixedname_init(&search.zonecut_name);
4349         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4350         search.now = now;
4351         update = NULL;
4352         updatesig = NULL;
4353
4354         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4355
4356         /*
4357          * Search down from the root of the tree.  If, while going down, we
4358          * encounter a callback node, cache_zonecut_callback() will search the
4359          * rdatasets at the zone cut for a DNAME rdataset.
4360          */
4361         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4362                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
4363                                   cache_zonecut_callback, &search);
4364
4365         if (result == DNS_R_PARTIALMATCH) {
4366                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4367                         result = find_coveringnsec(&search, nodep, now,
4368                                                    foundname, rdataset,
4369                                                    sigrdataset);
4370                         if (result == DNS_R_COVERINGNSEC)
4371                                 goto tree_exit;
4372                 }
4373                 if (search.zonecut != NULL) {
4374                     result = setup_delegation(&search, nodep, foundname,
4375                                               rdataset, sigrdataset);
4376                     goto tree_exit;
4377                 } else {
4378                 find_ns:
4379                         result = find_deepest_zonecut(&search, node, nodep,
4380                                                       foundname, rdataset,
4381                                                       sigrdataset);
4382                         goto tree_exit;
4383                 }
4384         } else if (result != ISC_R_SUCCESS)
4385                 goto tree_exit;
4386
4387         /*
4388          * Certain DNSSEC types are not subject to CNAME matching
4389          * (RFC4035, section 2.5 and RFC3007).
4390          *
4391          * We don't check for RRSIG, because we don't store RRSIG records
4392          * directly.
4393          */
4394         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4395                 cname_ok = ISC_FALSE;
4396
4397         /*
4398          * We now go looking for rdata...
4399          */
4400
4401         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4402         locktype = isc_rwlocktype_read;
4403         NODE_LOCK(lock, locktype);
4404
4405         found = NULL;
4406         foundsig = NULL;
4407         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4408         negtype = RBTDB_RDATATYPE_VALUE(0, type);
4409         nsheader = NULL;
4410         nssig = NULL;
4411         cnamesig = NULL;
4412         empty_node = ISC_TRUE;
4413         header_prev = NULL;
4414         for (header = node->data; header != NULL; header = header_next) {
4415                 header_next = header->next;
4416                 if (header->rdh_ttl <= now) {
4417                         /*
4418                          * This rdataset is stale.  If no one else is using the
4419                          * node, we can clean it up right now, otherwise we
4420                          * mark it as stale, and the node as dirty, so it will
4421                          * get cleaned up later.
4422                          */
4423                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4424                             (locktype == isc_rwlocktype_write ||
4425                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4426                                 /*
4427                                  * We update the node's status only when we
4428                                  * can get write access.
4429                                  */
4430                                 locktype = isc_rwlocktype_write;
4431
4432                                 if (dns_rbtnode_refcurrent(node) == 0) {
4433                                         isc_mem_t *mctx;
4434
4435                                         mctx = search.rbtdb->common.mctx;
4436                                         clean_stale_headers(search.rbtdb, mctx,
4437                                                             header);
4438                                         if (header_prev != NULL)
4439                                                 header_prev->next =
4440                                                         header->next;
4441                                         else
4442                                                 node->data = header->next;
4443                                         free_rdataset(search.rbtdb, mctx,
4444                                                       header);
4445                                 } else {
4446                                         header->attributes |=
4447                                                 RDATASET_ATTR_STALE;
4448                                         node->dirty = 1;
4449                                         header_prev = header;
4450                                 }
4451                         } else
4452                                 header_prev = header;
4453                 } else if (EXISTS(header)) {
4454                         /*
4455                          * We now know that there is at least one active
4456                          * non-stale rdataset at this node.
4457                          */
4458                         empty_node = ISC_FALSE;
4459
4460                         /*
4461                          * If we found a type we were looking for, remember
4462                          * it.
4463                          */
4464                         if (header->type == type ||
4465                             (type == dns_rdatatype_any &&
4466                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4467                             (cname_ok && header->type ==
4468                              dns_rdatatype_cname)) {
4469                                 /*
4470                                  * We've found the answer.
4471                                  */
4472                                 found = header;
4473                                 if (header->type == dns_rdatatype_cname &&
4474                                     cname_ok &&
4475                                     cnamesig != NULL) {
4476                                         /*
4477                                          * If we've already got the CNAME RRSIG,
4478                                          * use it, otherwise change sigtype
4479                                          * so that we find it.
4480                                          */
4481                                         if (cnamesig != NULL)
4482                                                 foundsig = cnamesig;
4483                                         else
4484                                                 sigtype =
4485                                                     RBTDB_RDATATYPE_SIGCNAME;
4486                                         foundsig = cnamesig;
4487                                 }
4488                         } else if (header->type == sigtype) {
4489                                 /*
4490                                  * We've found the RRSIG rdataset for our
4491                                  * target type.  Remember it.
4492                                  */
4493                                 foundsig = header;
4494                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4495                                    header->type == negtype) {
4496                                 /*
4497                                  * We've found a negative cache entry.
4498                                  */
4499                                 found = header;
4500                         } else if (header->type == dns_rdatatype_ns) {
4501                                 /*
4502                                  * Remember a NS rdataset even if we're
4503                                  * not specifically looking for it, because
4504                                  * we might need it later.
4505                                  */
4506                                 nsheader = header;
4507                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4508                                 /*
4509                                  * If we need the NS rdataset, we'll also
4510                                  * need its signature.
4511                                  */
4512                                 nssig = header;
4513                         } else if (cname_ok &&
4514                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
4515                                 /*
4516                                  * If we get a CNAME match, we'll also need
4517                                  * its signature.
4518                                  */
4519                                 cnamesig = header;
4520                         }
4521                         header_prev = header;
4522                 } else
4523                         header_prev = header;
4524         }
4525
4526         if (empty_node) {
4527                 /*
4528                  * We have an exact match for the name, but there are no
4529                  * extant rdatasets.  That means that this node doesn't
4530                  * meaningfully exist, and that we really have a partial match.
4531                  */
4532                 NODE_UNLOCK(lock, locktype);
4533                 goto find_ns;
4534         }
4535
4536         /*
4537          * If we didn't find what we were looking for...
4538          */
4539         if (found == NULL ||
4540             (DNS_TRUST_ADDITIONAL(found->trust) &&
4541              ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4542             (found->trust == dns_trust_glue &&
4543              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4544             (DNS_TRUST_PENDING(found->trust) &&
4545              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4546                 /*
4547                  * If there is an NS rdataset at this node, then this is the
4548                  * deepest zone cut.
4549                  */
4550                 if (nsheader != NULL) {
4551                         if (nodep != NULL) {
4552                                 new_reference(search.rbtdb, node);
4553                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4554                                 *nodep = node;
4555                         }
4556                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
4557                                       rdataset);
4558                         if (need_headerupdate(nsheader, search.now))
4559                                 update = nsheader;
4560                         if (nssig != NULL) {
4561                                 bind_rdataset(search.rbtdb, node, nssig,
4562                                               search.now, sigrdataset);
4563                                 if (need_headerupdate(nssig, search.now))
4564                                         updatesig = nssig;
4565                         }
4566                         result = DNS_R_DELEGATION;
4567                         goto node_exit;
4568                 }
4569
4570                 /*
4571                  * Go find the deepest zone cut.
4572                  */
4573                 NODE_UNLOCK(lock, locktype);
4574                 goto find_ns;
4575         }
4576
4577         /*
4578          * We found what we were looking for, or we found a CNAME.
4579          */
4580
4581         if (nodep != NULL) {
4582                 new_reference(search.rbtdb, node);
4583                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4584                 *nodep = node;
4585         }
4586
4587         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4588                 /*
4589                  * We found a negative cache entry.
4590                  */
4591                 if (NXDOMAIN(found))
4592                         result = DNS_R_NCACHENXDOMAIN;
4593                 else
4594                         result = DNS_R_NCACHENXRRSET;
4595         } else if (type != found->type &&
4596                    type != dns_rdatatype_any &&
4597                    found->type == dns_rdatatype_cname) {
4598                 /*
4599                  * We weren't doing an ANY query and we found a CNAME instead
4600                  * of the type we were looking for, so we need to indicate
4601                  * that result to the caller.
4602                  */
4603                 result = DNS_R_CNAME;
4604         } else {
4605                 /*
4606                  * An ordinary successful query!
4607                  */
4608                 result = ISC_R_SUCCESS;
4609         }
4610
4611         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4612             result == DNS_R_NCACHENXRRSET) {
4613                 bind_rdataset(search.rbtdb, node, found, search.now,
4614                               rdataset);
4615                 if (need_headerupdate(found, search.now))
4616                         update = found;
4617                 if (foundsig != NULL) {
4618                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
4619                                       sigrdataset);
4620                         if (need_headerupdate(foundsig, search.now))
4621                                 updatesig = foundsig;
4622                 }
4623         }
4624
4625  node_exit:
4626         if ((update != NULL || updatesig != NULL) &&
4627             locktype != isc_rwlocktype_write) {
4628                 NODE_UNLOCK(lock, locktype);
4629                 NODE_LOCK(lock, isc_rwlocktype_write);
4630                 locktype = isc_rwlocktype_write;
4631         }
4632         if (update != NULL && need_headerupdate(update, search.now))
4633                 update_header(search.rbtdb, update, search.now);
4634         if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4635                 update_header(search.rbtdb, updatesig, search.now);
4636
4637         NODE_UNLOCK(lock, locktype);
4638
4639  tree_exit:
4640         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4641
4642         /*
4643          * If we found a zonecut but aren't going to use it, we have to
4644          * let go of it.
4645          */
4646         if (search.need_cleanup) {
4647                 node = search.zonecut;
4648                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4649
4650                 NODE_LOCK(lock, isc_rwlocktype_read);
4651                 decrement_reference(search.rbtdb, node, 0,
4652                                     isc_rwlocktype_read, isc_rwlocktype_none,
4653                                     ISC_FALSE);
4654                 NODE_UNLOCK(lock, isc_rwlocktype_read);
4655         }
4656
4657         dns_rbtnodechain_reset(&search.chain);
4658
4659         return (result);
4660 }
4661
4662 static isc_result_t
4663 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4664                   isc_stdtime_t now, dns_dbnode_t **nodep,
4665                   dns_name_t *foundname,
4666                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4667 {
4668         dns_rbtnode_t *node = NULL;
4669         nodelock_t *lock;
4670         isc_result_t result;
4671         rbtdb_search_t search;
4672         rdatasetheader_t *header, *header_prev, *header_next;
4673         rdatasetheader_t *found, *foundsig;
4674         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4675         isc_rwlocktype_t locktype;
4676
4677         search.rbtdb = (dns_rbtdb_t *)db;
4678
4679         REQUIRE(VALID_RBTDB(search.rbtdb));
4680
4681         if (now == 0)
4682                 isc_stdtime_get(&now);
4683
4684         search.rbtversion = NULL;
4685         search.serial = 1;
4686         search.options = options;
4687         search.copy_name = ISC_FALSE;
4688         search.need_cleanup = ISC_FALSE;
4689         search.wild = ISC_FALSE;
4690         search.zonecut = NULL;
4691         dns_fixedname_init(&search.zonecut_name);
4692         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4693         search.now = now;
4694
4695         if ((options & DNS_DBFIND_NOEXACT) != 0)
4696                 rbtoptions |= DNS_RBTFIND_NOEXACT;
4697
4698         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4699
4700         /*
4701          * Search down from the root of the tree.
4702          */
4703         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4704                                   &search.chain, rbtoptions, NULL, &search);
4705
4706         if (result == DNS_R_PARTIALMATCH) {
4707         find_ns:
4708                 result = find_deepest_zonecut(&search, node, nodep, foundname,
4709                                               rdataset, sigrdataset);
4710                 goto tree_exit;
4711         } else if (result != ISC_R_SUCCESS)
4712                 goto tree_exit;
4713
4714         /*
4715          * We now go looking for an NS rdataset at the node.
4716          */
4717
4718         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4719         locktype = isc_rwlocktype_read;
4720         NODE_LOCK(lock, locktype);
4721
4722         found = NULL;
4723         foundsig = NULL;
4724         header_prev = NULL;
4725         for (header = node->data; header != NULL; header = header_next) {
4726                 header_next = header->next;
4727                 if (header->rdh_ttl <= now) {
4728                         /*
4729                          * This rdataset is stale.  If no one else is using the
4730                          * node, we can clean it up right now, otherwise we
4731                          * mark it as stale, and the node as dirty, so it will
4732                          * get cleaned up later.
4733                          */
4734                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4735                             (locktype == isc_rwlocktype_write ||
4736                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4737                                 /*
4738                                  * We update the node's status only when we
4739                                  * can get write access.
4740                                  */
4741                                 locktype = isc_rwlocktype_write;
4742
4743                                 if (dns_rbtnode_refcurrent(node) == 0) {
4744                                         isc_mem_t *mctx;
4745
4746                                         mctx = search.rbtdb->common.mctx;
4747                                         clean_stale_headers(search.rbtdb, mctx,
4748                                                             header);
4749                                         if (header_prev != NULL)
4750                                                 header_prev->next =
4751                                                         header->next;
4752                                         else
4753                                                 node->data = header->next;
4754                                         free_rdataset(search.rbtdb, mctx,
4755                                                       header);
4756                                 } else {
4757                                         header->attributes |=
4758                                                 RDATASET_ATTR_STALE;
4759                                         node->dirty = 1;
4760                                         header_prev = header;
4761                                 }
4762                         } else
4763                                 header_prev = header;
4764                 } else if (EXISTS(header)) {
4765                         /*
4766                          * If we found a type we were looking for, remember
4767                          * it.
4768                          */
4769                         if (header->type == dns_rdatatype_ns) {
4770                                 /*
4771                                  * Remember a NS rdataset even if we're
4772                                  * not specifically looking for it, because
4773                                  * we might need it later.
4774                                  */
4775                                 found = header;
4776                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4777                                 /*
4778                                  * If we need the NS rdataset, we'll also
4779                                  * need its signature.
4780                                  */
4781                                 foundsig = header;
4782                         }
4783                         header_prev = header;
4784                 } else
4785                         header_prev = header;
4786         }
4787
4788         if (found == NULL) {
4789                 /*
4790                  * No NS records here.
4791                  */
4792                 NODE_UNLOCK(lock, locktype);
4793                 goto find_ns;
4794         }
4795
4796         if (nodep != NULL) {
4797                 new_reference(search.rbtdb, node);
4798                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4799                 *nodep = node;
4800         }
4801
4802         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4803         if (foundsig != NULL)
4804                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4805                               sigrdataset);
4806
4807         if (need_headerupdate(found, search.now) ||
4808             (foundsig != NULL &&  need_headerupdate(foundsig, search.now))) {
4809                 if (locktype != isc_rwlocktype_write) {
4810                         NODE_UNLOCK(lock, locktype);
4811                         NODE_LOCK(lock, isc_rwlocktype_write);
4812                         locktype = isc_rwlocktype_write;
4813                 }
4814                 if (need_headerupdate(found, search.now))
4815                         update_header(search.rbtdb, found, search.now);
4816                 if (foundsig != NULL &&
4817                     need_headerupdate(foundsig, search.now)) {
4818                         update_header(search.rbtdb, foundsig, search.now);
4819                 }
4820         }
4821
4822         NODE_UNLOCK(lock, locktype);
4823
4824  tree_exit:
4825         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4826
4827         INSIST(!search.need_cleanup);
4828
4829         dns_rbtnodechain_reset(&search.chain);
4830
4831         if (result == DNS_R_DELEGATION)
4832                 result = ISC_R_SUCCESS;
4833
4834         return (result);
4835 }
4836
4837 static void
4838 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4839         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4840         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4841         unsigned int refs;
4842
4843         REQUIRE(VALID_RBTDB(rbtdb));
4844         REQUIRE(targetp != NULL && *targetp == NULL);
4845
4846         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4847         dns_rbtnode_refincrement(node, &refs);
4848         INSIST(refs != 0);
4849         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4850
4851         *targetp = source;
4852 }
4853
4854 static void
4855 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4856         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4857         dns_rbtnode_t *node;
4858         isc_boolean_t want_free = ISC_FALSE;
4859         isc_boolean_t inactive = ISC_FALSE;
4860         rbtdb_nodelock_t *nodelock;
4861
4862         REQUIRE(VALID_RBTDB(rbtdb));
4863         REQUIRE(targetp != NULL && *targetp != NULL);
4864
4865         node = (dns_rbtnode_t *)(*targetp);
4866         nodelock = &rbtdb->node_locks[node->locknum];
4867
4868         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4869
4870         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4871                                 isc_rwlocktype_none, ISC_FALSE)) {
4872                 if (isc_refcount_current(&nodelock->references) == 0 &&
4873                     nodelock->exiting) {
4874                         inactive = ISC_TRUE;
4875                 }
4876         }
4877
4878         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4879
4880         *targetp = NULL;
4881
4882         if (inactive) {
4883                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4884                 rbtdb->active--;
4885                 if (rbtdb->active == 0)
4886                         want_free = ISC_TRUE;
4887                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4888                 if (want_free) {
4889                         char buf[DNS_NAME_FORMATSIZE];
4890                         if (dns_name_dynamic(&rbtdb->common.origin))
4891                                 dns_name_format(&rbtdb->common.origin, buf,
4892                                                 sizeof(buf));
4893                         else
4894                                 strcpy(buf, "<UNKNOWN>");
4895                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4896                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4897                                       "calling free_rbtdb(%s)", buf);
4898                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
4899                 }
4900         }
4901 }
4902
4903 static isc_result_t
4904 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4905         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4906         dns_rbtnode_t *rbtnode = node;
4907         rdatasetheader_t *header;
4908         isc_boolean_t force_expire = ISC_FALSE;
4909         /*
4910          * These are the category and module used by the cache cleaner.
4911          */
4912         isc_boolean_t log = ISC_FALSE;
4913         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4914         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4915         int level = ISC_LOG_DEBUG(2);
4916         char printname[DNS_NAME_FORMATSIZE];
4917
4918         REQUIRE(VALID_RBTDB(rbtdb));
4919
4920         /*
4921          * Caller must hold a tree lock.
4922          */
4923
4924         if (now == 0)
4925                 isc_stdtime_get(&now);
4926
4927         if (rbtdb->overmem) {
4928                 isc_uint32_t val;
4929
4930                 isc_random_get(&val);
4931                 /*
4932                  * XXXDCL Could stand to have a better policy, like LRU.
4933                  */
4934                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
4935
4936                 /*
4937                  * Note that 'log' can be true IFF rbtdb->overmem is also true.
4938                  * rbtdb->overmem can currently only be true for cache
4939                  * databases -- hence all of the "overmem cache" log strings.
4940                  */
4941                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
4942                 if (log)
4943                         isc_log_write(dns_lctx, category, module, level,
4944                                       "overmem cache: %s %s",
4945                                       force_expire ? "FORCE" : "check",
4946                                       dns_rbt_formatnodename(rbtnode,
4947                                                            printname,
4948                                                            sizeof(printname)));
4949         }
4950
4951         /*
4952          * We may not need write access, but this code path is not performance
4953          * sensitive, so it should be okay to always lock as a writer.
4954          */
4955         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4956                   isc_rwlocktype_write);
4957
4958         for (header = rbtnode->data; header != NULL; header = header->next)
4959                 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
4960                         /*
4961                          * We don't check if refcurrent(rbtnode) == 0 and try
4962                          * to free like we do in cache_find(), because
4963                          * refcurrent(rbtnode) must be non-zero.  This is so
4964                          * because 'node' is an argument to the function.
4965                          */
4966                         header->attributes |= RDATASET_ATTR_STALE;
4967                         rbtnode->dirty = 1;
4968                         if (log)
4969                                 isc_log_write(dns_lctx, category, module,
4970                                               level, "overmem cache: stale %s",
4971                                               printname);
4972                 } else if (force_expire) {
4973                         if (! RETAIN(header)) {
4974                                 set_ttl(rbtdb, header, 0);
4975                                 header->attributes |= RDATASET_ATTR_STALE;
4976                                 rbtnode->dirty = 1;
4977                         } else if (log) {
4978                                 isc_log_write(dns_lctx, category, module,
4979                                               level, "overmem cache: "
4980                                               "reprieve by RETAIN() %s",
4981                                               printname);
4982                         }
4983                 } else if (rbtdb->overmem && log)
4984                         isc_log_write(dns_lctx, category, module, level,
4985                                       "overmem cache: saved %s", printname);
4986
4987         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4988                     isc_rwlocktype_write);
4989
4990         return (ISC_R_SUCCESS);
4991 }
4992
4993 static void
4994 overmem(dns_db_t *db, isc_boolean_t overmem) {
4995         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4996
4997         if (IS_CACHE(rbtdb))
4998                 rbtdb->overmem = overmem;
4999 }
5000
5001 static void
5002 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5003         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5004         dns_rbtnode_t *rbtnode = node;
5005         isc_boolean_t first;
5006
5007         REQUIRE(VALID_RBTDB(rbtdb));
5008
5009         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5010                   isc_rwlocktype_read);
5011
5012         fprintf(out, "node %p, %u references, locknum = %u\n",
5013                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5014                 rbtnode->locknum);
5015         if (rbtnode->data != NULL) {
5016                 rdatasetheader_t *current, *top_next;
5017
5018                 for (current = rbtnode->data; current != NULL;
5019                      current = top_next) {
5020                         top_next = current->next;
5021                         first = ISC_TRUE;
5022                         fprintf(out, "\ttype %u", current->type);
5023                         do {
5024                                 if (!first)
5025                                         fprintf(out, "\t");
5026                                 first = ISC_FALSE;
5027                                 fprintf(out,
5028                                         "\tserial = %lu, ttl = %u, "
5029                                         "trust = %u, attributes = %u, "
5030                                         "resign = %u\n",
5031                                         (unsigned long)current->serial,
5032                                         current->rdh_ttl,
5033                                         current->trust,
5034                                         current->attributes,
5035                                         current->resign);
5036                                 current = current->down;
5037                         } while (current != NULL);
5038                 }
5039         } else
5040                 fprintf(out, "(empty)\n");
5041
5042         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5043                     isc_rwlocktype_read);
5044 }
5045
5046 static isc_result_t
5047 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5048 {
5049         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5050         rbtdb_dbiterator_t *rbtdbiter;
5051
5052         REQUIRE(VALID_RBTDB(rbtdb));
5053
5054         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5055         if (rbtdbiter == NULL)
5056                 return (ISC_R_NOMEMORY);
5057
5058         rbtdbiter->common.methods = &dbiterator_methods;
5059         rbtdbiter->common.db = NULL;
5060         dns_db_attach(db, &rbtdbiter->common.db);
5061         rbtdbiter->common.relative_names =
5062                         ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5063         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5064         rbtdbiter->common.cleaning = ISC_FALSE;
5065         rbtdbiter->paused = ISC_TRUE;
5066         rbtdbiter->tree_locked = isc_rwlocktype_none;
5067         rbtdbiter->result = ISC_R_SUCCESS;
5068         dns_fixedname_init(&rbtdbiter->name);
5069         dns_fixedname_init(&rbtdbiter->origin);
5070         rbtdbiter->node = NULL;
5071         rbtdbiter->delete = 0;
5072         rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5073         rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5074         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5075         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5076         dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5077         if (rbtdbiter->nsec3only)
5078                 rbtdbiter->current = &rbtdbiter->nsec3chain;
5079         else
5080                 rbtdbiter->current = &rbtdbiter->chain;
5081
5082         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5083
5084         return (ISC_R_SUCCESS);
5085 }
5086
5087 static isc_result_t
5088 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5089                   dns_rdatatype_t type, dns_rdatatype_t covers,
5090                   isc_stdtime_t now, dns_rdataset_t *rdataset,
5091                   dns_rdataset_t *sigrdataset)
5092 {
5093         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5094         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5095         rdatasetheader_t *header, *header_next, *found, *foundsig;
5096         rbtdb_serial_t serial;
5097         rbtdb_version_t *rbtversion = version;
5098         isc_boolean_t close_version = ISC_FALSE;
5099         rbtdb_rdatatype_t matchtype, sigmatchtype;
5100
5101         REQUIRE(VALID_RBTDB(rbtdb));
5102         REQUIRE(type != dns_rdatatype_any);
5103
5104         if (rbtversion == NULL) {
5105                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5106                 close_version = ISC_TRUE;
5107         }
5108         serial = rbtversion->serial;
5109         now = 0;
5110
5111         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5112                   isc_rwlocktype_read);
5113
5114         found = NULL;
5115         foundsig = NULL;
5116         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5117         if (covers == 0)
5118                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5119         else
5120                 sigmatchtype = 0;
5121
5122         for (header = rbtnode->data; header != NULL; header = header_next) {
5123                 header_next = header->next;
5124                 do {
5125                         if (header->serial <= serial &&
5126                             !IGNORE(header)) {
5127                                 /*
5128                                  * Is this a "this rdataset doesn't
5129                                  * exist" record?
5130                                  */
5131                                 if (NONEXISTENT(header))
5132                                         header = NULL;
5133                                 break;
5134                         } else
5135                                 header = header->down;
5136                 } while (header != NULL);
5137                 if (header != NULL) {
5138                         /*
5139                          * We have an active, extant rdataset.  If it's a
5140                          * type we're looking for, remember it.
5141                          */
5142                         if (header->type == matchtype) {
5143                                 found = header;
5144                                 if (foundsig != NULL)
5145                                         break;
5146                         } else if (header->type == sigmatchtype) {
5147                                 foundsig = header;
5148                                 if (found != NULL)
5149                                         break;
5150                         }
5151                 }
5152         }
5153         if (found != NULL) {
5154                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5155                 if (foundsig != NULL)
5156                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5157                                       sigrdataset);
5158         }
5159
5160         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5161                     isc_rwlocktype_read);
5162
5163         if (close_version)
5164                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5165                              ISC_FALSE);
5166
5167         if (found == NULL)
5168                 return (ISC_R_NOTFOUND);
5169
5170         return (ISC_R_SUCCESS);
5171 }
5172
5173 static isc_result_t
5174 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5175                    dns_rdatatype_t type, dns_rdatatype_t covers,
5176                    isc_stdtime_t now, dns_rdataset_t *rdataset,
5177                    dns_rdataset_t *sigrdataset)
5178 {
5179         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5180         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5181         rdatasetheader_t *header, *header_next, *found, *foundsig;
5182         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5183         isc_result_t result;
5184         nodelock_t *lock;
5185         isc_rwlocktype_t locktype;
5186
5187         REQUIRE(VALID_RBTDB(rbtdb));
5188         REQUIRE(type != dns_rdatatype_any);
5189
5190         UNUSED(version);
5191
5192         result = ISC_R_SUCCESS;
5193
5194         if (now == 0)
5195                 isc_stdtime_get(&now);
5196
5197         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5198         locktype = isc_rwlocktype_read;
5199         NODE_LOCK(lock, locktype);
5200
5201         found = NULL;
5202         foundsig = NULL;
5203         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5204         negtype = RBTDB_RDATATYPE_VALUE(0, type);
5205         if (covers == 0)
5206                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5207         else
5208                 sigmatchtype = 0;
5209
5210         for (header = rbtnode->data; header != NULL; header = header_next) {
5211                 header_next = header->next;
5212                 if (header->rdh_ttl <= now) {
5213                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5214                             (locktype == isc_rwlocktype_write ||
5215                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5216                                 /*
5217                                  * We update the node's status only when we
5218                                  * can get write access.
5219                                  */
5220                                 locktype = isc_rwlocktype_write;
5221
5222                                 /*
5223                                  * We don't check if refcurrent(rbtnode) == 0
5224                                  * and try to free like we do in cache_find(),
5225                                  * because refcurrent(rbtnode) must be
5226                                  * non-zero.  This is so because 'node' is an
5227                                  * argument to the function.
5228                                  */
5229                                 header->attributes |= RDATASET_ATTR_STALE;
5230                                 rbtnode->dirty = 1;
5231                         }
5232                 } else if (EXISTS(header)) {
5233                         if (header->type == matchtype)
5234                                 found = header;
5235                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5236                                  header->type == negtype)
5237                                 found = header;
5238                         else if (header->type == sigmatchtype)
5239                                 foundsig = header;
5240                 }
5241         }
5242         if (found != NULL) {
5243                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5244                 if (foundsig != NULL)
5245                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5246                                       sigrdataset);
5247         }
5248
5249         NODE_UNLOCK(lock, locktype);
5250
5251         if (found == NULL)
5252                 return (ISC_R_NOTFOUND);
5253
5254         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
5255                 /*
5256                  * We found a negative cache entry.
5257                  */
5258                 if (NXDOMAIN(found))
5259                         result = DNS_R_NCACHENXDOMAIN;
5260                 else
5261                         result = DNS_R_NCACHENXRRSET;
5262         }
5263
5264         return (result);
5265 }
5266
5267 static isc_result_t
5268 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5269              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5270 {
5271         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5272         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5273         rbtdb_version_t *rbtversion = version;
5274         rbtdb_rdatasetiter_t *iterator;
5275         unsigned int refs;
5276
5277         REQUIRE(VALID_RBTDB(rbtdb));
5278
5279         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5280         if (iterator == NULL)
5281                 return (ISC_R_NOMEMORY);
5282
5283         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5284                 now = 0;
5285                 if (rbtversion == NULL)
5286                         currentversion(db,
5287                                  (dns_dbversion_t **) (void *)(&rbtversion));
5288                 else {
5289                         unsigned int refs;
5290
5291                         isc_refcount_increment(&rbtversion->references,
5292                                                &refs);
5293                         INSIST(refs > 1);
5294                 }
5295         } else {
5296                 if (now == 0)
5297                         isc_stdtime_get(&now);
5298                 rbtversion = NULL;
5299         }
5300
5301         iterator->common.magic = DNS_RDATASETITER_MAGIC;
5302         iterator->common.methods = &rdatasetiter_methods;
5303         iterator->common.db = db;
5304         iterator->common.node = node;
5305         iterator->common.version = (dns_dbversion_t *)rbtversion;
5306         iterator->common.now = now;
5307
5308         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5309
5310         dns_rbtnode_refincrement(rbtnode, &refs);
5311         INSIST(refs != 0);
5312
5313         iterator->current = NULL;
5314
5315         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5316
5317         *iteratorp = (dns_rdatasetiter_t *)iterator;
5318
5319         return (ISC_R_SUCCESS);
5320 }
5321
5322 static isc_boolean_t
5323 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5324         rdatasetheader_t *header, *header_next;
5325         isc_boolean_t cname, other_data;
5326         dns_rdatatype_t rdtype;
5327
5328         /*
5329          * The caller must hold the node lock.
5330          */
5331
5332         /*
5333          * Look for CNAME and "other data" rdatasets active in our version.
5334          */
5335         cname = ISC_FALSE;
5336         other_data = ISC_FALSE;
5337         for (header = node->data; header != NULL; header = header_next) {
5338                 header_next = header->next;
5339                 if (header->type == dns_rdatatype_cname) {
5340                         /*
5341                          * Look for an active extant CNAME.
5342                          */
5343                         do {
5344                                 if (header->serial <= serial &&
5345                                     !IGNORE(header)) {
5346                                         /*
5347                                          * Is this a "this rdataset doesn't
5348                                          * exist" record?
5349                                          */
5350                                         if (NONEXISTENT(header))
5351                                                 header = NULL;
5352                                         break;
5353                                 } else
5354                                         header = header->down;
5355                         } while (header != NULL);
5356                         if (header != NULL)
5357                                 cname = ISC_TRUE;
5358                 } else {
5359                         /*
5360                          * Look for active extant "other data".
5361                          *
5362                          * "Other data" is any rdataset whose type is not
5363                          * KEY, NSEC, SIG or RRSIG.
5364                          */
5365                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
5366                         if (rdtype != dns_rdatatype_key &&
5367                             rdtype != dns_rdatatype_sig &&
5368                             rdtype != dns_rdatatype_nsec &&
5369                             rdtype != dns_rdatatype_rrsig) {
5370                                 /*
5371                                  * Is it active and extant?
5372                                  */
5373                                 do {
5374                                         if (header->serial <= serial &&
5375                                             !IGNORE(header)) {
5376                                                 /*
5377                                                  * Is this a "this rdataset
5378                                                  * doesn't exist" record?
5379                                                  */
5380                                                 if (NONEXISTENT(header))
5381                                                         header = NULL;
5382                                                 break;
5383                                         } else
5384                                                 header = header->down;
5385                                 } while (header != NULL);
5386                                 if (header != NULL)
5387                                         other_data = ISC_TRUE;
5388                         }
5389                 }
5390         }
5391
5392         if (cname && other_data)
5393                 return (ISC_TRUE);
5394
5395         return (ISC_FALSE);
5396 }
5397
5398 static isc_result_t
5399 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5400         isc_result_t result;
5401
5402         INSIST(!IS_CACHE(rbtdb));
5403         INSIST(newheader->heap_index == 0);
5404         INSIST(!ISC_LINK_LINKED(newheader, link));
5405
5406         result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5407         return (result);
5408 }
5409
5410 static isc_result_t
5411 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5412     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5413     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5414 {
5415         rbtdb_changed_t *changed = NULL;
5416         rdatasetheader_t *topheader, *topheader_prev, *header;
5417         unsigned char *merged;
5418         isc_result_t result;
5419         isc_boolean_t header_nx;
5420         isc_boolean_t newheader_nx;
5421         isc_boolean_t merge;
5422         dns_rdatatype_t rdtype, covers;
5423         rbtdb_rdatatype_t negtype;
5424         dns_trust_t trust;
5425         int idx;
5426
5427         /*
5428          * Add an rdatasetheader_t to a node.
5429          */
5430
5431         /*
5432          * Caller must be holding the node lock.
5433          */
5434
5435         if ((options & DNS_DBADD_MERGE) != 0) {
5436                 REQUIRE(rbtversion != NULL);
5437                 merge = ISC_TRUE;
5438         } else
5439                 merge = ISC_FALSE;
5440
5441         if ((options & DNS_DBADD_FORCE) != 0)
5442                 trust = dns_trust_ultimate;
5443         else
5444                 trust = newheader->trust;
5445
5446         if (rbtversion != NULL && !loading) {
5447                 /*
5448                  * We always add a changed record, even if no changes end up
5449                  * being made to this node, because it's harmless and
5450                  * simplifies the code.
5451                  */
5452                 changed = add_changed(rbtdb, rbtversion, rbtnode);
5453                 if (changed == NULL) {
5454                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5455                         return (ISC_R_NOMEMORY);
5456                 }
5457         }
5458
5459         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5460         topheader_prev = NULL;
5461
5462         negtype = 0;
5463         if (rbtversion == NULL && !newheader_nx) {
5464                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5465                 if (rdtype == 0) {
5466                         /*
5467                          * We're adding a negative cache entry.
5468                          */
5469                         covers = RBTDB_RDATATYPE_EXT(newheader->type);
5470                         if (covers == dns_rdatatype_any) {
5471                                 /*
5472                                  * We're adding an negative cache entry
5473                                  * which covers all types (NXDOMAIN,
5474                                  * NODATA(QTYPE=ANY)).
5475                                  *
5476                                  * We make all other data stale so that the
5477                                  * only rdataset that can be found at this
5478                                  * node is the negative cache entry.
5479                                  */
5480                                 for (topheader = rbtnode->data;
5481                                      topheader != NULL;
5482                                      topheader = topheader->next) {
5483                                         set_ttl(rbtdb, topheader, 0);
5484                                         topheader->attributes |=
5485                                                 RDATASET_ATTR_STALE;
5486                                 }
5487                                 rbtnode->dirty = 1;
5488                                 goto find_header;
5489                         }
5490                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5491                 } else {
5492                         /*
5493                          * We're adding something that isn't a
5494                          * negative cache entry.  Look for an extant
5495                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5496                          * cache entry.
5497                          */
5498                         for (topheader = rbtnode->data;
5499                              topheader != NULL;
5500                              topheader = topheader->next) {
5501                                 if (topheader->type ==
5502                                     RBTDB_RDATATYPE_NCACHEANY)
5503                                         break;
5504                         }
5505                         if (topheader != NULL && EXISTS(topheader) &&
5506                             topheader->rdh_ttl > now) {
5507                                 /*
5508                                  * Found one.
5509                                  */
5510                                 if (trust < topheader->trust) {
5511                                         /*
5512                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
5513                                          * is more trusted.
5514                                          */
5515                                         free_rdataset(rbtdb,
5516                                                       rbtdb->common.mctx,
5517                                                       newheader);
5518                                         if (addedrdataset != NULL)
5519                                                 bind_rdataset(rbtdb, rbtnode,
5520                                                               topheader, now,
5521                                                               addedrdataset);
5522                                         return (DNS_R_UNCHANGED);
5523                                 }
5524                                 /*
5525                                  * The new rdataset is better.  Expire the
5526                                  * NXDOMAIN/NODATA(QTYPE=ANY).
5527                                  */
5528                                 set_ttl(rbtdb, topheader, 0);
5529                                 topheader->attributes |= RDATASET_ATTR_STALE;
5530                                 rbtnode->dirty = 1;
5531                                 topheader = NULL;
5532                                 goto find_header;
5533                         }
5534                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5535                 }
5536         }
5537
5538         for (topheader = rbtnode->data;
5539              topheader != NULL;
5540              topheader = topheader->next) {
5541                 if (topheader->type == newheader->type ||
5542                     topheader->type == negtype)
5543                         break;
5544                 topheader_prev = topheader;
5545         }
5546
5547  find_header:
5548         /*
5549          * If header isn't NULL, we've found the right type.  There may be
5550          * IGNORE rdatasets between the top of the chain and the first real
5551          * data.  We skip over them.
5552          */
5553         header = topheader;
5554         while (header != NULL && IGNORE(header))
5555                 header = header->down;
5556         if (header != NULL) {
5557                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5558
5559                 /*
5560                  * Deleting an already non-existent rdataset has no effect.
5561                  */
5562                 if (header_nx && newheader_nx) {
5563                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5564                         return (DNS_R_UNCHANGED);
5565                 }
5566
5567                 /*
5568                  * Trying to add an rdataset with lower trust to a cache DB
5569                  * has no effect, provided that the cache data isn't stale.
5570                  */
5571                 if (rbtversion == NULL && trust < header->trust &&
5572                     (header->rdh_ttl > now || header_nx)) {
5573                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5574                         if (addedrdataset != NULL)
5575                                 bind_rdataset(rbtdb, rbtnode, header, now,
5576                                               addedrdataset);
5577                         return (DNS_R_UNCHANGED);
5578                 }
5579
5580                 /*
5581                  * Don't merge if a nonexistent rdataset is involved.
5582                  */
5583                 if (merge && (header_nx || newheader_nx))
5584                         merge = ISC_FALSE;
5585
5586                 /*
5587                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5588                  * that is the union of 'newheader' and 'header'.
5589                  */
5590                 if (merge) {
5591                         unsigned int flags = 0;
5592                         INSIST(rbtversion->serial >= header->serial);
5593                         merged = NULL;
5594                         result = ISC_R_SUCCESS;
5595
5596                         if ((options & DNS_DBADD_EXACT) != 0)
5597                                 flags |= DNS_RDATASLAB_EXACT;
5598                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5599                              newheader->rdh_ttl != header->rdh_ttl)
5600                                         result = DNS_R_NOTEXACT;
5601                         else if (newheader->rdh_ttl != header->rdh_ttl)
5602                                 flags |= DNS_RDATASLAB_FORCE;
5603                         if (result == ISC_R_SUCCESS)
5604                                 result = dns_rdataslab_merge(
5605                                              (unsigned char *)header,
5606                                              (unsigned char *)newheader,
5607                                              (unsigned int)(sizeof(*newheader)),
5608                                              rbtdb->common.mctx,
5609                                              rbtdb->common.rdclass,
5610                                              (dns_rdatatype_t)header->type,
5611                                              flags, &merged);
5612                         if (result == ISC_R_SUCCESS) {
5613                                 /*
5614                                  * If 'header' has the same serial number as
5615                                  * we do, we could clean it up now if we knew
5616                                  * that our caller had no references to it.
5617                                  * We don't know this, however, so we leave it
5618                                  * alone.  It will get cleaned up when
5619                                  * clean_zone_node() runs.
5620                                  */
5621                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5622                                               newheader);
5623                                 newheader = (rdatasetheader_t *)merged;
5624                                 if (loading && RESIGN(newheader) &&
5625                                     RESIGN(header) &&
5626                                     header->resign < newheader->resign)
5627                                         newheader->resign = header->resign;
5628                         } else {
5629                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5630                                               newheader);
5631                                 return (result);
5632                         }
5633                 }
5634                 /*
5635                  * Don't replace existing NS, A and AAAA RRsets
5636                  * in the cache if they are already exist.  This
5637                  * prevents named being locked to old servers.
5638                  * Don't lower trust of existing record if the
5639                  * update is forced.
5640                  */
5641                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5642                     header->type == dns_rdatatype_ns &&
5643                     !header_nx && !newheader_nx &&
5644                     header->trust >= newheader->trust &&
5645                     dns_rdataslab_equalx((unsigned char *)header,
5646                                          (unsigned char *)newheader,
5647                                          (unsigned int)(sizeof(*newheader)),
5648                                          rbtdb->common.rdclass,
5649                                          (dns_rdatatype_t)header->type)) {
5650                         /*
5651                          * Honour the new ttl if it is less than the
5652                          * older one.
5653                          */
5654                         if (header->rdh_ttl > newheader->rdh_ttl)
5655                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5656                         if (header->noqname == NULL &&
5657                             newheader->noqname != NULL) {
5658                                 header->noqname = newheader->noqname;
5659                                 newheader->noqname = NULL;
5660                         }
5661                         if (header->closest == NULL &&
5662                             newheader->closest != NULL) {
5663                                 header->closest = newheader->closest;
5664                                 newheader->closest = NULL;
5665                         }
5666                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5667                         if (addedrdataset != NULL)
5668                                 bind_rdataset(rbtdb, rbtnode, header, now,
5669                                               addedrdataset);
5670                         return (ISC_R_SUCCESS);
5671                 }
5672                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5673                     (header->type == dns_rdatatype_a ||
5674                      header->type == dns_rdatatype_aaaa) &&
5675                     !header_nx && !newheader_nx &&
5676                     header->trust >= newheader->trust &&
5677                     dns_rdataslab_equal((unsigned char *)header,
5678                                         (unsigned char *)newheader,
5679                                         (unsigned int)(sizeof(*newheader)))) {
5680                         /*
5681                          * Honour the new ttl if it is less than the
5682                          * older one.
5683                          */
5684                         if (header->rdh_ttl > newheader->rdh_ttl)
5685                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5686                         if (header->noqname == NULL &&
5687                             newheader->noqname != NULL) {
5688                                 header->noqname = newheader->noqname;
5689                                 newheader->noqname = NULL;
5690                         }
5691                         if (header->closest == NULL &&
5692                             newheader->closest != NULL) {
5693                                 header->closest = newheader->closest;
5694                                 newheader->closest = NULL;
5695                         }
5696                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5697                         if (addedrdataset != NULL)
5698                                 bind_rdataset(rbtdb, rbtnode, header, now,
5699                                               addedrdataset);
5700                         return (ISC_R_SUCCESS);
5701                 }
5702                 INSIST(rbtversion == NULL ||
5703                        rbtversion->serial >= topheader->serial);
5704                 if (topheader_prev != NULL)
5705                         topheader_prev->next = newheader;
5706                 else
5707                         rbtnode->data = newheader;
5708                 newheader->next = topheader->next;
5709                 if (loading) {
5710                         /*
5711                          * There are no other references to 'header' when
5712                          * loading, so we MAY clean up 'header' now.
5713                          * Since we don't generate changed records when
5714                          * loading, we MUST clean up 'header' now.
5715                          */
5716                         newheader->down = NULL;
5717                         free_rdataset(rbtdb, rbtdb->common.mctx, header);
5718                 } else {
5719                         newheader->down = topheader;
5720                         topheader->next = newheader;
5721                         rbtnode->dirty = 1;
5722                         if (changed != NULL)
5723                                 changed->dirty = ISC_TRUE;
5724                         if (rbtversion == NULL) {
5725                                 set_ttl(rbtdb, header, 0);
5726                                 header->attributes |= RDATASET_ATTR_STALE;
5727                         }
5728                         idx = newheader->node->locknum;
5729                         if (IS_CACHE(rbtdb)) {
5730                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5731                                                  newheader, link);
5732                                 /*
5733                                  * XXXMLG We don't check the return value
5734                                  * here.  If it fails, we will not do TTL
5735                                  * based expiry on this node.  However, we
5736                                  * will do it on the LRU side, so memory
5737                                  * will not leak... for long.
5738                                  */
5739                                 isc_heap_insert(rbtdb->heaps[idx], newheader);
5740                         } else if (RESIGN(newheader))
5741                                 resign_insert(rbtdb, idx, newheader);
5742                 }
5743         } else {
5744                 /*
5745                  * No non-IGNORED rdatasets of the given type exist at
5746                  * this node.
5747                  */
5748
5749                 /*
5750                  * If we're trying to delete the type, don't bother.
5751                  */
5752                 if (newheader_nx) {
5753                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5754                         return (DNS_R_UNCHANGED);
5755                 }
5756
5757                 if (topheader != NULL) {
5758                         /*
5759                          * We have an list of rdatasets of the given type,
5760                          * but they're all marked IGNORE.  We simply insert
5761                          * the new rdataset at the head of the list.
5762                          *
5763                          * Ignored rdatasets cannot occur during loading, so
5764                          * we INSIST on it.
5765                          */
5766                         INSIST(!loading);
5767                         INSIST(rbtversion == NULL ||
5768                                rbtversion->serial >= topheader->serial);
5769                         if (topheader_prev != NULL)
5770                                 topheader_prev->next = newheader;
5771                         else
5772                                 rbtnode->data = newheader;
5773                         newheader->next = topheader->next;
5774                         newheader->down = topheader;
5775                         topheader->next = newheader;
5776                         rbtnode->dirty = 1;
5777                         if (changed != NULL)
5778                                 changed->dirty = ISC_TRUE;
5779                 } else {
5780                         /*
5781                          * No rdatasets of the given type exist at the node.
5782                          */
5783                         newheader->next = rbtnode->data;
5784                         newheader->down = NULL;
5785                         rbtnode->data = newheader;
5786                 }
5787                 idx = newheader->node->locknum;
5788                 if (IS_CACHE(rbtdb)) {
5789                         ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5790                                          newheader, link);
5791                         isc_heap_insert(rbtdb->heaps[idx], newheader);
5792                 } else if (RESIGN(newheader)) {
5793                         resign_insert(rbtdb, idx, newheader);
5794                 }
5795         }
5796
5797         /*
5798          * Check if the node now contains CNAME and other data.
5799          */
5800         if (rbtversion != NULL &&
5801             cname_and_other_data(rbtnode, rbtversion->serial))
5802                 return (DNS_R_CNAMEANDOTHER);
5803
5804         if (addedrdataset != NULL)
5805                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5806
5807         return (ISC_R_SUCCESS);
5808 }
5809
5810 static inline isc_boolean_t
5811 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5812                 rbtdb_rdatatype_t type)
5813 {
5814         if (IS_CACHE(rbtdb)) {
5815                 if (type == dns_rdatatype_dname)
5816                         return (ISC_TRUE);
5817                 else
5818                         return (ISC_FALSE);
5819         } else if (type == dns_rdatatype_dname ||
5820                    (type == dns_rdatatype_ns &&
5821                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5822                 return (ISC_TRUE);
5823         return (ISC_FALSE);
5824 }
5825
5826 static inline isc_result_t
5827 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5828            dns_rdataset_t *rdataset)
5829 {
5830         struct noqname *noqname;
5831         isc_mem_t *mctx = rbtdb->common.mctx;
5832         dns_name_t name;
5833         dns_rdataset_t neg, negsig;
5834         isc_result_t result;
5835         isc_region_t r;
5836
5837         dns_name_init(&name, NULL);
5838         dns_rdataset_init(&neg);
5839         dns_rdataset_init(&negsig);
5840
5841         result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
5842         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5843
5844         noqname = isc_mem_get(mctx, sizeof(*noqname));
5845         if (noqname == NULL) {
5846                 result = ISC_R_NOMEMORY;
5847                 goto cleanup;
5848         }
5849         dns_name_init(&noqname->name, NULL);
5850         noqname->neg = NULL;
5851         noqname->negsig = NULL;
5852         noqname->type = neg.type;
5853         result = dns_name_dup(&name, mctx, &noqname->name);
5854         if (result != ISC_R_SUCCESS)
5855                 goto cleanup;
5856         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5857         if (result != ISC_R_SUCCESS)
5858                 goto cleanup;
5859         noqname->neg = r.base;
5860         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5861         if (result != ISC_R_SUCCESS)
5862                 goto cleanup;
5863         noqname->negsig = r.base;
5864         dns_rdataset_disassociate(&neg);
5865         dns_rdataset_disassociate(&negsig);
5866         newheader->noqname = noqname;
5867         return (ISC_R_SUCCESS);
5868
5869 cleanup:
5870         dns_rdataset_disassociate(&neg);
5871         dns_rdataset_disassociate(&negsig);
5872         free_noqname(mctx, &noqname);
5873         return(result);
5874 }
5875
5876 static inline isc_result_t
5877 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5878            dns_rdataset_t *rdataset)
5879 {
5880         struct noqname *closest;
5881         isc_mem_t *mctx = rbtdb->common.mctx;
5882         dns_name_t name;
5883         dns_rdataset_t neg, negsig;
5884         isc_result_t result;
5885         isc_region_t r;
5886
5887         dns_name_init(&name, NULL);
5888         dns_rdataset_init(&neg);
5889         dns_rdataset_init(&negsig);
5890
5891         result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
5892         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5893
5894         closest = isc_mem_get(mctx, sizeof(*closest));
5895         if (closest == NULL) {
5896                 result = ISC_R_NOMEMORY;
5897                 goto cleanup;
5898         }
5899         dns_name_init(&closest->name, NULL);
5900         closest->neg = NULL;
5901         closest->negsig = NULL;
5902         closest->type = neg.type;
5903         result = dns_name_dup(&name, mctx, &closest->name);
5904         if (result != ISC_R_SUCCESS)
5905                 goto cleanup;
5906         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5907         if (result != ISC_R_SUCCESS)
5908                 goto cleanup;
5909         closest->neg = r.base;
5910         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5911         if (result != ISC_R_SUCCESS)
5912                 goto cleanup;
5913         closest->negsig = r.base;
5914         dns_rdataset_disassociate(&neg);
5915         dns_rdataset_disassociate(&negsig);
5916         newheader->closest = closest;
5917         return (ISC_R_SUCCESS);
5918
5919  cleanup:
5920         dns_rdataset_disassociate(&neg);
5921         dns_rdataset_disassociate(&negsig);
5922         free_noqname(mctx, &closest);
5923         return(result);
5924 }
5925
5926 static dns_dbmethods_t zone_methods;
5927
5928 static isc_result_t
5929 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5930             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
5931             dns_rdataset_t *addedrdataset)
5932 {
5933         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5934         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5935         rbtdb_version_t *rbtversion = version;
5936         isc_region_t region;
5937         rdatasetheader_t *newheader;
5938         rdatasetheader_t *header;
5939         isc_result_t result;
5940         isc_boolean_t delegating;
5941         isc_boolean_t tree_locked = ISC_FALSE;
5942
5943         REQUIRE(VALID_RBTDB(rbtdb));
5944
5945         if (rbtdb->common.methods == &zone_methods)
5946                 REQUIRE(((rbtnode->nsec3 &&
5947                           (rdataset->type == dns_rdatatype_nsec3 ||
5948                            rdataset->covers == dns_rdatatype_nsec3)) ||
5949                          (!rbtnode->nsec3 &&
5950                            rdataset->type != dns_rdatatype_nsec3 &&
5951                            rdataset->covers != dns_rdatatype_nsec3)));
5952
5953         if (rbtversion == NULL) {
5954                 if (now == 0)
5955                         isc_stdtime_get(&now);
5956         } else
5957                 now = 0;
5958
5959         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5960                                             &region,
5961                                             sizeof(rdatasetheader_t));
5962         if (result != ISC_R_SUCCESS)
5963                 return (result);
5964
5965         newheader = (rdatasetheader_t *)region.base;
5966         init_rdataset(rbtdb, newheader);
5967         set_ttl(rbtdb, newheader, rdataset->ttl + now);
5968         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5969                                                 rdataset->covers);
5970         newheader->attributes = 0;
5971         newheader->noqname = NULL;
5972         newheader->closest = NULL;
5973         newheader->count = init_count++;
5974         newheader->trust = rdataset->trust;
5975         newheader->additional_auth = NULL;
5976         newheader->additional_glue = NULL;
5977         newheader->last_used = now;
5978         newheader->node = rbtnode;
5979         if (rbtversion != NULL) {
5980                 newheader->serial = rbtversion->serial;
5981                 now = 0;
5982
5983                 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
5984                         newheader->attributes |= RDATASET_ATTR_RESIGN;
5985                         newheader->resign = rdataset->resign;
5986                 } else
5987                         newheader->resign = 0;
5988         } else {
5989                 newheader->serial = 1;
5990                 newheader->resign = 0;
5991                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
5992                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
5993                 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
5994                         newheader->attributes |= RDATASET_ATTR_OPTOUT;
5995                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
5996                         result = addnoqname(rbtdb, newheader, rdataset);
5997                         if (result != ISC_R_SUCCESS) {
5998                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5999                                               newheader);
6000                                 return (result);
6001                         }
6002                 }
6003                 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6004                         result = addclosest(rbtdb, newheader, rdataset);
6005                         if (result != ISC_R_SUCCESS) {
6006                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6007                                               newheader);
6008                                 return (result);
6009                         }
6010                 }
6011         }
6012
6013         /*
6014          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6015          * just DNAME for the cache), then we need to set the callback bit
6016          * on the node.
6017          */
6018         if (delegating_type(rbtdb, rbtnode, rdataset->type))
6019                 delegating = ISC_TRUE;
6020         else
6021                 delegating = ISC_FALSE;
6022
6023         /*
6024          * If we're adding a delegation type or the DB is a cache in an overmem
6025          * state, hold an exclusive lock on the tree.  In the latter case
6026          * the lock does not necessarily have to be acquired but it will help
6027          * purge stale entries more effectively.
6028          */
6029         if (delegating || (IS_CACHE(rbtdb) && rbtdb->overmem)) {
6030                 tree_locked = ISC_TRUE;
6031                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6032         }
6033
6034         if (IS_CACHE(rbtdb) && rbtdb->overmem)
6035                 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6036
6037         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6038                   isc_rwlocktype_write);
6039
6040         if (rbtdb->rrsetstats != NULL) {
6041                 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6042                 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6043         }
6044
6045         if (IS_CACHE(rbtdb)) {
6046                 if (tree_locked)
6047                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6048
6049                 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6050                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6051                         expire_header(rbtdb, header, tree_locked);
6052
6053                 /*
6054                  * If we've been holding a write lock on the tree just for
6055                  * cleaning, we can release it now.  However, we still need the
6056                  * node lock.
6057                  */
6058                 if (tree_locked && !delegating) {
6059                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6060                         tree_locked = ISC_FALSE;
6061                 }
6062         }
6063
6064         result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
6065                      addedrdataset, now);
6066         if (result == ISC_R_SUCCESS && delegating)
6067                 rbtnode->find_callback = 1;
6068
6069         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6070                     isc_rwlocktype_write);
6071
6072         if (tree_locked)
6073                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6074
6075         /*
6076          * Update the zone's secure status.  If version is non-NULL
6077          * this is deferred until closeversion() is called.
6078          */
6079         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6080                 iszonesecure(db, version, rbtdb->origin_node);
6081
6082         return (result);
6083 }
6084
6085 static isc_result_t
6086 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6087                  dns_rdataset_t *rdataset, unsigned int options,
6088                  dns_rdataset_t *newrdataset)
6089 {
6090         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6091         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6092         rbtdb_version_t *rbtversion = version;
6093         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6094         unsigned char *subresult;
6095         isc_region_t region;
6096         isc_result_t result;
6097         rbtdb_changed_t *changed;
6098
6099         REQUIRE(VALID_RBTDB(rbtdb));
6100
6101         if (rbtdb->common.methods == &zone_methods)
6102                 REQUIRE(((rbtnode->nsec3 &&
6103                           (rdataset->type == dns_rdatatype_nsec3 ||
6104                            rdataset->covers == dns_rdatatype_nsec3)) ||
6105                          (!rbtnode->nsec3 &&
6106                            rdataset->type != dns_rdatatype_nsec3 &&
6107                            rdataset->covers != dns_rdatatype_nsec3)));
6108
6109         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6110                                             &region,
6111                                             sizeof(rdatasetheader_t));
6112         if (result != ISC_R_SUCCESS)
6113                 return (result);
6114         newheader = (rdatasetheader_t *)region.base;
6115         init_rdataset(rbtdb, newheader);
6116         set_ttl(rbtdb, newheader, rdataset->ttl);
6117         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6118                                                 rdataset->covers);
6119         newheader->attributes = 0;
6120         newheader->serial = rbtversion->serial;
6121         newheader->trust = 0;
6122         newheader->noqname = NULL;
6123         newheader->closest = NULL;
6124         newheader->count = init_count++;
6125         newheader->additional_auth = NULL;
6126         newheader->additional_glue = NULL;
6127         newheader->last_used = 0;
6128         newheader->node = rbtnode;
6129         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6130                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6131                 newheader->resign = rdataset->resign;
6132         } else
6133                 newheader->resign = 0;
6134
6135         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6136                   isc_rwlocktype_write);
6137
6138         changed = add_changed(rbtdb, rbtversion, rbtnode);
6139         if (changed == NULL) {
6140                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6141                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6142                             isc_rwlocktype_write);
6143                 return (ISC_R_NOMEMORY);
6144         }
6145
6146         topheader_prev = NULL;
6147         for (topheader = rbtnode->data;
6148              topheader != NULL;
6149              topheader = topheader->next) {
6150                 if (topheader->type == newheader->type)
6151                         break;
6152                 topheader_prev = topheader;
6153         }
6154         /*
6155          * If header isn't NULL, we've found the right type.  There may be
6156          * IGNORE rdatasets between the top of the chain and the first real
6157          * data.  We skip over them.
6158          */
6159         header = topheader;
6160         while (header != NULL && IGNORE(header))
6161                 header = header->down;
6162         if (header != NULL && EXISTS(header)) {
6163                 unsigned int flags = 0;
6164                 subresult = NULL;
6165                 result = ISC_R_SUCCESS;
6166                 if ((options & DNS_DBSUB_EXACT) != 0) {
6167                         flags |= DNS_RDATASLAB_EXACT;
6168                         if (newheader->rdh_ttl != header->rdh_ttl)
6169                                 result = DNS_R_NOTEXACT;
6170                 }
6171                 if (result == ISC_R_SUCCESS)
6172                         result = dns_rdataslab_subtract(
6173                                         (unsigned char *)header,
6174                                         (unsigned char *)newheader,
6175                                         (unsigned int)(sizeof(*newheader)),
6176                                         rbtdb->common.mctx,
6177                                         rbtdb->common.rdclass,
6178                                         (dns_rdatatype_t)header->type,
6179                                         flags, &subresult);
6180                 if (result == ISC_R_SUCCESS) {
6181                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6182                         newheader = (rdatasetheader_t *)subresult;
6183                         init_rdataset(rbtdb, newheader);
6184                         /*
6185                          * We have to set the serial since the rdataslab
6186                          * subtraction routine copies the reserved portion of
6187                          * header, not newheader.
6188                          */
6189                         newheader->serial = rbtversion->serial;
6190                         /*
6191                          * XXXJT: dns_rdataslab_subtract() copied the pointers
6192                          * to additional info.  We need to clear these fields
6193                          * to avoid having duplicated references.
6194                          */
6195                         newheader->additional_auth = NULL;
6196                         newheader->additional_glue = NULL;
6197                 } else if (result == DNS_R_NXRRSET) {
6198                         /*
6199                          * This subtraction would remove all of the rdata;
6200                          * add a nonexistent header instead.
6201                          */
6202                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6203                         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6204                         if (newheader == NULL) {
6205                                 result = ISC_R_NOMEMORY;
6206                                 goto unlock;
6207                         }
6208                         set_ttl(rbtdb, newheader, 0);
6209                         newheader->type = topheader->type;
6210                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6211                         newheader->trust = 0;
6212                         newheader->serial = rbtversion->serial;
6213                         newheader->noqname = NULL;
6214                         newheader->closest = NULL;
6215                         newheader->count = 0;
6216                         newheader->additional_auth = NULL;
6217                         newheader->additional_glue = NULL;
6218                         newheader->node = rbtnode;
6219                         newheader->resign = 0;
6220                         newheader->last_used = 0;
6221                 } else {
6222                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6223                         goto unlock;
6224                 }
6225
6226                 /*
6227                  * If we're here, we want to link newheader in front of
6228                  * topheader.
6229                  */
6230                 INSIST(rbtversion->serial >= topheader->serial);
6231                 if (topheader_prev != NULL)
6232                         topheader_prev->next = newheader;
6233                 else
6234                         rbtnode->data = newheader;
6235                 newheader->next = topheader->next;
6236                 newheader->down = topheader;
6237                 topheader->next = newheader;
6238                 rbtnode->dirty = 1;
6239                 changed->dirty = ISC_TRUE;
6240         } else {
6241                 /*
6242                  * The rdataset doesn't exist, so we don't need to do anything
6243                  * to satisfy the deletion request.
6244                  */
6245                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6246                 if ((options & DNS_DBSUB_EXACT) != 0)
6247                         result = DNS_R_NOTEXACT;
6248                 else
6249                         result = DNS_R_UNCHANGED;
6250         }
6251
6252         if (result == ISC_R_SUCCESS && newrdataset != NULL)
6253                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6254
6255  unlock:
6256         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6257                     isc_rwlocktype_write);
6258
6259         /*
6260          * Update the zone's secure status.  If version is non-NULL
6261          * this is deferred until closeversion() is called.
6262          */
6263         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6264                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6265
6266         return (result);
6267 }
6268
6269 static isc_result_t
6270 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6271                dns_rdatatype_t type, dns_rdatatype_t covers)
6272 {
6273         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6274         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6275         rbtdb_version_t *rbtversion = version;
6276         isc_result_t result;
6277         rdatasetheader_t *newheader;
6278
6279         REQUIRE(VALID_RBTDB(rbtdb));
6280
6281         if (type == dns_rdatatype_any)
6282                 return (ISC_R_NOTIMPLEMENTED);
6283         if (type == dns_rdatatype_rrsig && covers == 0)
6284                 return (ISC_R_NOTIMPLEMENTED);
6285
6286         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6287         if (newheader == NULL)
6288                 return (ISC_R_NOMEMORY);
6289         set_ttl(rbtdb, newheader, 0);
6290         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6291         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6292         newheader->trust = 0;
6293         newheader->noqname = NULL;
6294         newheader->closest = NULL;
6295         newheader->additional_auth = NULL;
6296         newheader->additional_glue = NULL;
6297         if (rbtversion != NULL)
6298                 newheader->serial = rbtversion->serial;
6299         else
6300                 newheader->serial = 0;
6301         newheader->count = 0;
6302         newheader->last_used = 0;
6303         newheader->node = rbtnode;
6304
6305         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6306                   isc_rwlocktype_write);
6307
6308         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6309                      ISC_FALSE, NULL, 0);
6310
6311         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6312                     isc_rwlocktype_write);
6313
6314         /*
6315          * Update the zone's secure status.  If version is non-NULL
6316          * this is deferred until closeversion() is called.
6317          */
6318         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6319                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6320
6321         return (result);
6322 }
6323
6324 static isc_result_t
6325 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6326         rbtdb_load_t *loadctx = arg;
6327         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6328         dns_rbtnode_t *node;
6329         isc_result_t result;
6330         isc_region_t region;
6331         rdatasetheader_t *newheader;
6332
6333         /*
6334          * This routine does no node locking.  See comments in
6335          * 'load' below for more information on loading and
6336          * locking.
6337          */
6338
6339
6340         /*
6341          * SOA records are only allowed at top of zone.
6342          */
6343         if (rdataset->type == dns_rdatatype_soa &&
6344             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6345                 return (DNS_R_NOTZONETOP);
6346
6347         if (rdataset->type != dns_rdatatype_nsec3 &&
6348             rdataset->covers != dns_rdatatype_nsec3)
6349                 add_empty_wildcards(rbtdb, name);
6350
6351         if (dns_name_iswildcard(name)) {
6352                 /*
6353                  * NS record owners cannot legally be wild cards.
6354                  */
6355                 if (rdataset->type == dns_rdatatype_ns)
6356                         return (DNS_R_INVALIDNS);
6357                 /*
6358                  * NSEC3 record owners cannot legally be wild cards.
6359                  */
6360                 if (rdataset->type == dns_rdatatype_nsec3)
6361                         return (DNS_R_INVALIDNSEC3);
6362                 result = add_wildcard_magic(rbtdb, name);
6363                 if (result != ISC_R_SUCCESS)
6364                         return (result);
6365         }
6366
6367         node = NULL;
6368         if (rdataset->type == dns_rdatatype_nsec3 ||
6369             rdataset->covers == dns_rdatatype_nsec3) {
6370                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6371                 if (result == ISC_R_SUCCESS)
6372                         node->nsec3 = 1;
6373         } else {
6374                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
6375                 if (result == ISC_R_SUCCESS)
6376                         node->nsec3 = 0;
6377         }
6378         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6379                 return (result);
6380         if (result != ISC_R_EXISTS) {
6381                 dns_name_t foundname;
6382                 dns_name_init(&foundname, NULL);
6383                 dns_rbt_namefromnode(node, &foundname);
6384 #ifdef DNS_RBT_USEHASH
6385                 node->locknum = node->hashval % rbtdb->node_lock_count;
6386 #else
6387                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6388                         rbtdb->node_lock_count;
6389 #endif
6390         }
6391
6392         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6393                                             &region,
6394                                             sizeof(rdatasetheader_t));
6395         if (result != ISC_R_SUCCESS)
6396                 return (result);
6397         newheader = (rdatasetheader_t *)region.base;
6398         init_rdataset(rbtdb, newheader);
6399         set_ttl(rbtdb, newheader,
6400                 rdataset->ttl + loadctx->now); /* XXX overflow check */
6401         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6402                                                 rdataset->covers);
6403         newheader->attributes = 0;
6404         newheader->trust = rdataset->trust;
6405         newheader->serial = 1;
6406         newheader->noqname = NULL;
6407         newheader->closest = NULL;
6408         newheader->count = init_count++;
6409         newheader->additional_auth = NULL;
6410         newheader->additional_glue = NULL;
6411         newheader->last_used = 0;
6412         newheader->node = node;
6413         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6414                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6415                 newheader->resign = rdataset->resign;
6416         } else
6417                 newheader->resign = 0;
6418
6419         result = add(rbtdb, node, rbtdb->current_version, newheader,
6420                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6421         if (result == ISC_R_SUCCESS &&
6422             delegating_type(rbtdb, node, rdataset->type))
6423                 node->find_callback = 1;
6424         else if (result == DNS_R_UNCHANGED)
6425                 result = ISC_R_SUCCESS;
6426
6427         return (result);
6428 }
6429
6430 static isc_result_t
6431 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6432         rbtdb_load_t *loadctx;
6433         dns_rbtdb_t *rbtdb;
6434
6435         rbtdb = (dns_rbtdb_t *)db;
6436
6437         REQUIRE(VALID_RBTDB(rbtdb));
6438
6439         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6440         if (loadctx == NULL)
6441                 return (ISC_R_NOMEMORY);
6442
6443         loadctx->rbtdb = rbtdb;
6444         if (IS_CACHE(rbtdb))
6445                 isc_stdtime_get(&loadctx->now);
6446         else
6447                 loadctx->now = 0;
6448
6449         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6450
6451         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6452                 == 0);
6453         rbtdb->attributes |= RBTDB_ATTR_LOADING;
6454
6455         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6456
6457         *addp = loading_addrdataset;
6458         *dbloadp = loadctx;
6459
6460         return (ISC_R_SUCCESS);
6461 }
6462
6463 static isc_result_t
6464 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6465         rbtdb_load_t *loadctx;
6466         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6467
6468         REQUIRE(VALID_RBTDB(rbtdb));
6469         REQUIRE(dbloadp != NULL);
6470         loadctx = *dbloadp;
6471         REQUIRE(loadctx->rbtdb == rbtdb);
6472
6473         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6474
6475         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6476         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6477
6478         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6479         rbtdb->attributes |= RBTDB_ATTR_LOADED;
6480
6481         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6482
6483         /*
6484          * If there's a KEY rdataset at the zone origin containing a
6485          * zone key, we consider the zone secure.
6486          */
6487         if (! IS_CACHE(rbtdb))
6488                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6489
6490         *dbloadp = NULL;
6491
6492         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6493
6494         return (ISC_R_SUCCESS);
6495 }
6496
6497 static isc_result_t
6498 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6499      dns_masterformat_t masterformat) {
6500         dns_rbtdb_t *rbtdb;
6501
6502         rbtdb = (dns_rbtdb_t *)db;
6503
6504         REQUIRE(VALID_RBTDB(rbtdb));
6505
6506         return (dns_master_dump2(rbtdb->common.mctx, db, version,
6507                                  &dns_master_style_default,
6508                                  filename, masterformat));
6509 }
6510
6511 static void
6512 delete_callback(void *data, void *arg) {
6513         dns_rbtdb_t *rbtdb = arg;
6514         rdatasetheader_t *current, *next;
6515         unsigned int locknum;
6516
6517         current = data;
6518         locknum = current->node->locknum;
6519         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6520         while (current != NULL) {
6521                 next = current->next;
6522                 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6523                 current = next;
6524         }
6525         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6526 }
6527
6528 static isc_boolean_t
6529 issecure(dns_db_t *db) {
6530         dns_rbtdb_t *rbtdb;
6531         isc_boolean_t secure;
6532
6533         rbtdb = (dns_rbtdb_t *)db;
6534
6535         REQUIRE(VALID_RBTDB(rbtdb));
6536
6537         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6538         secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6539         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6540
6541         return (secure);
6542 }
6543
6544 static isc_boolean_t
6545 isdnssec(dns_db_t *db) {
6546         dns_rbtdb_t *rbtdb;
6547         isc_boolean_t dnssec;
6548
6549         rbtdb = (dns_rbtdb_t *)db;
6550
6551         REQUIRE(VALID_RBTDB(rbtdb));
6552
6553         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6554         dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6555         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6556
6557         return (dnssec);
6558 }
6559
6560 static unsigned int
6561 nodecount(dns_db_t *db) {
6562         dns_rbtdb_t *rbtdb;
6563         unsigned int count;
6564
6565         rbtdb = (dns_rbtdb_t *)db;
6566
6567         REQUIRE(VALID_RBTDB(rbtdb));
6568
6569         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6570         count = dns_rbt_nodecount(rbtdb->tree);
6571         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6572
6573         return (count);
6574 }
6575
6576 static void
6577 settask(dns_db_t *db, isc_task_t *task) {
6578         dns_rbtdb_t *rbtdb;
6579
6580         rbtdb = (dns_rbtdb_t *)db;
6581
6582         REQUIRE(VALID_RBTDB(rbtdb));
6583
6584         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6585         if (rbtdb->task != NULL)
6586                 isc_task_detach(&rbtdb->task);
6587         if (task != NULL)
6588                 isc_task_attach(task, &rbtdb->task);
6589         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6590 }
6591
6592 static isc_boolean_t
6593 ispersistent(dns_db_t *db) {
6594         UNUSED(db);
6595         return (ISC_FALSE);
6596 }
6597
6598 static isc_result_t
6599 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6600         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6601         dns_rbtnode_t *onode;
6602         isc_result_t result = ISC_R_SUCCESS;
6603
6604         REQUIRE(VALID_RBTDB(rbtdb));
6605         REQUIRE(nodep != NULL && *nodep == NULL);
6606
6607         /* Note that the access to origin_node doesn't require a DB lock */
6608         onode = (dns_rbtnode_t *)rbtdb->origin_node;
6609         if (onode != NULL) {
6610                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6611                 new_reference(rbtdb, onode);
6612                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6613
6614                 *nodep = rbtdb->origin_node;
6615         } else {
6616                 INSIST(IS_CACHE(rbtdb));
6617                 result = ISC_R_NOTFOUND;
6618         }
6619
6620         return (result);
6621 }
6622
6623 static isc_result_t
6624 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6625                    isc_uint8_t *flags, isc_uint16_t *iterations,
6626                    unsigned char *salt, size_t *salt_length)
6627 {
6628         dns_rbtdb_t *rbtdb;
6629         isc_result_t result = ISC_R_NOTFOUND;
6630         rbtdb_version_t *rbtversion = version;
6631
6632         rbtdb = (dns_rbtdb_t *)db;
6633
6634         REQUIRE(VALID_RBTDB(rbtdb));
6635
6636         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6637
6638         if (rbtversion == NULL)
6639                 rbtversion = rbtdb->current_version;
6640
6641         if (rbtversion->havensec3) {
6642                 if (hash != NULL)
6643                         *hash = rbtversion->hash;
6644                 if (salt != NULL && salt_length != NULL) {
6645                         REQUIRE(*salt_length >= rbtversion->salt_length);
6646                         memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6647                 }
6648                 if (salt_length != NULL)
6649                         *salt_length = rbtversion->salt_length;
6650                 if (iterations != NULL)
6651                         *iterations = rbtversion->iterations;
6652                 if (flags != NULL)
6653                         *flags = rbtversion->flags;
6654                 result = ISC_R_SUCCESS;
6655         }
6656         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6657
6658         return (result);
6659 }
6660
6661 static isc_result_t
6662 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6663         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6664         isc_stdtime_t oldresign;
6665         isc_result_t result = ISC_R_SUCCESS;
6666         rdatasetheader_t *header;
6667
6668         REQUIRE(VALID_RBTDB(rbtdb));
6669         REQUIRE(!IS_CACHE(rbtdb));
6670         REQUIRE(rdataset != NULL);
6671
6672         header = rdataset->private3;
6673         header--;
6674
6675         NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6676                   isc_rwlocktype_write);
6677
6678         oldresign = header->resign;
6679         header->resign = resign;
6680         if (header->heap_index != 0) {
6681                 INSIST(RESIGN(header));
6682                 if (resign == 0) {
6683                         isc_heap_delete(rbtdb->heaps[header->node->locknum],
6684                                         header->heap_index);
6685                         header->heap_index = 0;
6686                 } else if (resign < oldresign)
6687                         isc_heap_increased(rbtdb->heaps[header->node->locknum],
6688                                            header->heap_index);
6689                 else
6690                         isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6691                                            header->heap_index);
6692         } else if (resign && header->heap_index == 0) {
6693                 header->attributes |= RDATASET_ATTR_RESIGN;
6694                 result = resign_insert(rbtdb, header->node->locknum, header);
6695         }
6696         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6697                     isc_rwlocktype_write);
6698         return (result);
6699 }
6700
6701 static isc_result_t
6702 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
6703                dns_name_t *foundname)
6704 {
6705         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6706         rdatasetheader_t *header = NULL, *this;
6707         unsigned int i;
6708         isc_result_t result = ISC_R_NOTFOUND;
6709         unsigned int locknum;
6710
6711         REQUIRE(VALID_RBTDB(rbtdb));
6712
6713         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
6714
6715         for (i = 0; i < rbtdb->node_lock_count; i++) {
6716                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
6717                 this = isc_heap_element(rbtdb->heaps[i], 1);
6718                 if (this == NULL) {
6719                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6720                                     isc_rwlocktype_read);
6721                         continue;
6722                 }
6723                 if (header == NULL)
6724                         header = this;
6725                 else if (isc_serial_lt(this->resign, header->resign)) {
6726                         locknum = header->node->locknum;
6727                         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
6728                                     isc_rwlocktype_read);
6729                         header = this;
6730                 } else
6731                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6732                                     isc_rwlocktype_read);
6733         }
6734
6735         if (header == NULL)
6736                 goto unlock;
6737
6738         bind_rdataset(rbtdb, header->node, header, 0, rdataset);
6739
6740         if (foundname != NULL)
6741                 dns_rbt_fullnamefromnode(header->node, foundname);
6742
6743         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6744                     isc_rwlocktype_read);
6745
6746         result = ISC_R_SUCCESS;
6747
6748  unlock:
6749         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
6750
6751         return (result);
6752 }
6753
6754 static void
6755 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
6756 {
6757         rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
6758         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6759         dns_rbtnode_t *node;
6760         rdatasetheader_t *header;
6761
6762         REQUIRE(VALID_RBTDB(rbtdb));
6763         REQUIRE(rdataset != NULL);
6764         REQUIRE(rbtdb->future_version == rbtversion);
6765         REQUIRE(rbtversion->writer);
6766
6767         node = rdataset->private2;
6768         header = rdataset->private3;
6769         header--;
6770
6771         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6772         NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
6773                   isc_rwlocktype_write);
6774         /*
6775          * Delete from heap and save to re-signed list so that it can
6776          * be restored if we backout of this change.
6777          */
6778         new_reference(rbtdb, node);
6779         isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
6780         header->heap_index = 0;
6781         ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
6782
6783         NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
6784                     isc_rwlocktype_write);
6785         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6786 }
6787
6788 static dns_stats_t *
6789 getrrsetstats(dns_db_t *db) {
6790         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6791
6792         REQUIRE(VALID_RBTDB(rbtdb));
6793         REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6794
6795         return (rbtdb->rrsetstats);
6796 }
6797
6798 static dns_dbmethods_t zone_methods = {
6799         attach,
6800         detach,
6801         beginload,
6802         endload,
6803         dump,
6804         currentversion,
6805         newversion,
6806         attachversion,
6807         closeversion,
6808         findnode,
6809         zone_find,
6810         zone_findzonecut,
6811         attachnode,
6812         detachnode,
6813         expirenode,
6814         printnode,
6815         createiterator,
6816         zone_findrdataset,
6817         allrdatasets,
6818         addrdataset,
6819         subtractrdataset,
6820         deleterdataset,
6821         issecure,
6822         nodecount,
6823         ispersistent,
6824         overmem,
6825         settask,
6826         getoriginnode,
6827         NULL,
6828         getnsec3parameters,
6829         findnsec3node,
6830         setsigningtime,
6831         getsigningtime,
6832         resigned,
6833         isdnssec,
6834         NULL
6835 };
6836
6837 static dns_dbmethods_t cache_methods = {
6838         attach,
6839         detach,
6840         beginload,
6841         endload,
6842         dump,
6843         currentversion,
6844         newversion,
6845         attachversion,
6846         closeversion,
6847         findnode,
6848         cache_find,
6849         cache_findzonecut,
6850         attachnode,
6851         detachnode,
6852         expirenode,
6853         printnode,
6854         createiterator,
6855         cache_findrdataset,
6856         allrdatasets,
6857         addrdataset,
6858         subtractrdataset,
6859         deleterdataset,
6860         issecure,
6861         nodecount,
6862         ispersistent,
6863         overmem,
6864         settask,
6865         getoriginnode,
6866         NULL,
6867         NULL,
6868         NULL,
6869         NULL,
6870         NULL,
6871         NULL,
6872         isdnssec,
6873         getrrsetstats
6874 };
6875
6876 isc_result_t
6877 #ifdef DNS_RBTDB_VERSION64
6878 dns_rbtdb64_create
6879 #else
6880 dns_rbtdb_create
6881 #endif
6882                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6883                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6884                  void *driverarg, dns_db_t **dbp)
6885 {
6886         dns_rbtdb_t *rbtdb;
6887         isc_result_t result;
6888         int i;
6889         dns_name_t name;
6890         isc_boolean_t (*sooner)(void *, void *);
6891
6892         /* Keep the compiler happy. */
6893         UNUSED(argc);
6894         UNUSED(argv);
6895         UNUSED(driverarg);
6896
6897         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
6898         if (rbtdb == NULL)
6899                 return (ISC_R_NOMEMORY);
6900
6901         memset(rbtdb, '\0', sizeof(*rbtdb));
6902         dns_name_init(&rbtdb->common.origin, NULL);
6903         rbtdb->common.attributes = 0;
6904         if (type == dns_dbtype_cache) {
6905                 rbtdb->common.methods = &cache_methods;
6906                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
6907         } else if (type == dns_dbtype_stub) {
6908                 rbtdb->common.methods = &zone_methods;
6909                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
6910         } else
6911                 rbtdb->common.methods = &zone_methods;
6912         rbtdb->common.rdclass = rdclass;
6913         rbtdb->common.mctx = NULL;
6914
6915         result = RBTDB_INITLOCK(&rbtdb->lock);
6916         if (result != ISC_R_SUCCESS)
6917                 goto cleanup_rbtdb;
6918
6919         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
6920         if (result != ISC_R_SUCCESS)
6921                 goto cleanup_lock;
6922
6923         /*
6924          * Initialize node_lock_count in a generic way to support future
6925          * extension which allows the user to specify this value on creation.
6926          * Note that when specified for a cache DB it must be larger than 1
6927          * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
6928          */
6929         if (rbtdb->node_lock_count == 0) {
6930                 if (IS_CACHE(rbtdb))
6931                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
6932                 else
6933                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
6934         } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
6935                 result = ISC_R_RANGE;
6936                 goto cleanup_tree_lock;
6937         }
6938         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
6939         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
6940                                         sizeof(rbtdb_nodelock_t));
6941         if (rbtdb->node_locks == NULL) {
6942                 result = ISC_R_NOMEMORY;
6943                 goto cleanup_tree_lock;
6944         }
6945
6946         rbtdb->rrsetstats = NULL;
6947         if (IS_CACHE(rbtdb)) {
6948                 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
6949                 if (result != ISC_R_SUCCESS)
6950                         goto cleanup_node_locks;
6951                 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
6952                                                sizeof(rdatasetheaderlist_t));
6953                 if (rbtdb->rdatasets == NULL) {
6954                         result = ISC_R_NOMEMORY;
6955                         goto cleanup_rrsetstats;
6956                 }
6957                 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6958                         ISC_LIST_INIT(rbtdb->rdatasets[i]);
6959         } else
6960                 rbtdb->rdatasets = NULL;
6961
6962         /*
6963          * Create the heaps.
6964          */
6965         rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
6966                                    sizeof(isc_heap_t *));
6967         if (rbtdb->heaps == NULL) {
6968                 result = ISC_R_NOMEMORY;
6969                 goto cleanup_rdatasets;
6970         }
6971         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6972                 rbtdb->heaps[i] = NULL;
6973         sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
6974         for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
6975                 result = isc_heap_create(mctx, sooner, set_index, 0,
6976                                          &rbtdb->heaps[i]);
6977                 if (result != ISC_R_SUCCESS)
6978                         goto cleanup_heaps;
6979         }
6980
6981         /*
6982          * Create deadnode lists.
6983          */
6984         rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
6985                                        sizeof(rbtnodelist_t));
6986         if (rbtdb->deadnodes == NULL) {
6987                 result = ISC_R_NOMEMORY;
6988                 goto cleanup_heaps;
6989         }
6990         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6991                 ISC_LIST_INIT(rbtdb->deadnodes[i]);
6992
6993         rbtdb->active = rbtdb->node_lock_count;
6994
6995         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
6996                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
6997                 if (result == ISC_R_SUCCESS) {
6998                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
6999                         if (result != ISC_R_SUCCESS)
7000                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7001                 }
7002                 if (result != ISC_R_SUCCESS) {
7003                         while (i-- > 0) {
7004                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7005                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7006                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7007                         }
7008                         goto cleanup_deadnodes;
7009                 }
7010                 rbtdb->node_locks[i].exiting = ISC_FALSE;
7011         }
7012
7013         /*
7014          * Attach to the mctx.  The database will persist so long as there
7015          * are references to it, and attaching to the mctx ensures that our
7016          * mctx won't disappear out from under us.
7017          */
7018         isc_mem_attach(mctx, &rbtdb->common.mctx);
7019
7020         /*
7021          * Must be initialized before free_rbtdb() is called.
7022          */
7023         isc_ondestroy_init(&rbtdb->common.ondest);
7024
7025         /*
7026          * Make a copy of the origin name.
7027          */
7028         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7029         if (result != ISC_R_SUCCESS) {
7030                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7031                 return (result);
7032         }
7033
7034         /*
7035          * Make the Red-Black Trees.
7036          */
7037         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7038         if (result != ISC_R_SUCCESS) {
7039                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7040                 return (result);
7041         }
7042
7043         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7044         if (result != ISC_R_SUCCESS) {
7045                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7046                 return (result);
7047         }
7048
7049         /*
7050          * In order to set the node callback bit correctly in zone databases,
7051          * we need to know if the node has the origin name of the zone.
7052          * In loading_addrdataset() we could simply compare the new name
7053          * to the origin name, but this is expensive.  Also, we don't know the
7054          * node name in addrdataset(), so we need another way of knowing the
7055          * zone's top.
7056          *
7057          * We now explicitly create a node for the zone's origin, and then
7058          * we simply remember the node's address.  This is safe, because
7059          * the top-of-zone node can never be deleted, nor can its address
7060          * change.
7061          */
7062         if (!IS_CACHE(rbtdb)) {
7063                 rbtdb->origin_node = NULL;
7064                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7065                                          &rbtdb->origin_node);
7066                 if (result != ISC_R_SUCCESS) {
7067                         INSIST(result != ISC_R_EXISTS);
7068                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7069                         return (result);
7070                 }
7071                 rbtdb->origin_node->nsec3 = 0;
7072                 /*
7073                  * We need to give the origin node the right locknum.
7074                  */
7075                 dns_name_init(&name, NULL);
7076                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7077 #ifdef DNS_RBT_USEHASH
7078                 rbtdb->origin_node->locknum =
7079                         rbtdb->origin_node->hashval %
7080                         rbtdb->node_lock_count;
7081 #else
7082                 rbtdb->origin_node->locknum =
7083                         dns_name_hash(&name, ISC_TRUE) %
7084                         rbtdb->node_lock_count;
7085 #endif
7086         }
7087
7088         /*
7089          * Misc. Initialization.
7090          */
7091         result = isc_refcount_init(&rbtdb->references, 1);
7092         if (result != ISC_R_SUCCESS) {
7093                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7094                 return (result);
7095         }
7096         rbtdb->attributes = 0;
7097         rbtdb->overmem = ISC_FALSE;
7098         rbtdb->task = NULL;
7099
7100         /*
7101          * Version Initialization.
7102          */
7103         rbtdb->current_serial = 1;
7104         rbtdb->least_serial = 1;
7105         rbtdb->next_serial = 2;
7106         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7107         if (rbtdb->current_version == NULL) {
7108                 isc_refcount_decrement(&rbtdb->references, NULL);
7109                 isc_refcount_destroy(&rbtdb->references);
7110                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7111                 return (ISC_R_NOMEMORY);
7112         }
7113         rbtdb->current_version->secure = dns_db_insecure;
7114         rbtdb->current_version->havensec3 = ISC_FALSE;
7115         rbtdb->current_version->flags = 0;
7116         rbtdb->current_version->iterations = 0;
7117         rbtdb->current_version->hash = 0;
7118         rbtdb->current_version->salt_length = 0;
7119         memset(rbtdb->current_version->salt, 0,
7120                sizeof(rbtdb->current_version->salt));
7121         rbtdb->future_version = NULL;
7122         ISC_LIST_INIT(rbtdb->open_versions);
7123         /*
7124          * Keep the current version in the open list so that list operation
7125          * won't happen in normal lookup operations.
7126          */
7127         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7128
7129         rbtdb->common.magic = DNS_DB_MAGIC;
7130         rbtdb->common.impmagic = RBTDB_MAGIC;
7131
7132         *dbp = (dns_db_t *)rbtdb;
7133
7134         return (ISC_R_SUCCESS);
7135
7136  cleanup_deadnodes:
7137         isc_mem_put(mctx, rbtdb->deadnodes,
7138                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7139
7140  cleanup_heaps:
7141         if (rbtdb->heaps != NULL) {
7142                 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7143                         if (rbtdb->heaps[i] != NULL)
7144                                 isc_heap_destroy(&rbtdb->heaps[i]);
7145                 isc_mem_put(mctx, rbtdb->heaps,
7146                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
7147         }
7148
7149  cleanup_rdatasets:
7150         if (rbtdb->rdatasets != NULL)
7151                 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7152                             sizeof(rdatasetheaderlist_t));
7153  cleanup_rrsetstats:
7154         if (rbtdb->rrsetstats != NULL)
7155                 dns_stats_detach(&rbtdb->rrsetstats);
7156
7157  cleanup_node_locks:
7158         isc_mem_put(mctx, rbtdb->node_locks,
7159                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7160
7161  cleanup_tree_lock:
7162         isc_rwlock_destroy(&rbtdb->tree_lock);
7163
7164  cleanup_lock:
7165         RBTDB_DESTROYLOCK(&rbtdb->lock);
7166
7167  cleanup_rbtdb:
7168         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
7169         return (result);
7170 }
7171
7172
7173 /*
7174  * Slabbed Rdataset Methods
7175  */
7176
7177 static void
7178 rdataset_disassociate(dns_rdataset_t *rdataset) {
7179         dns_db_t *db = rdataset->private1;
7180         dns_dbnode_t *node = rdataset->private2;
7181
7182         detachnode(db, &node);
7183 }
7184
7185 static isc_result_t
7186 rdataset_first(dns_rdataset_t *rdataset) {
7187         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7188         unsigned int count;
7189
7190         count = raw[0] * 256 + raw[1];
7191         if (count == 0) {
7192                 rdataset->private5 = NULL;
7193                 return (ISC_R_NOMORE);
7194         }
7195
7196 #if DNS_RDATASET_FIXED
7197         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7198                 raw += 2 + (4 * count);
7199         else
7200 #endif
7201                 raw += 2;
7202
7203         /*
7204          * The privateuint4 field is the number of rdata beyond the
7205          * cursor position, so we decrement the total count by one
7206          * before storing it.
7207          *
7208          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7209          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7210          * to the first entry in the offset table.
7211          */
7212         count--;
7213         rdataset->privateuint4 = count;
7214         rdataset->private5 = raw;
7215
7216         return (ISC_R_SUCCESS);
7217 }
7218
7219 static isc_result_t
7220 rdataset_next(dns_rdataset_t *rdataset) {
7221         unsigned int count;
7222         unsigned int length;
7223         unsigned char *raw;     /* RDATASLAB */
7224
7225         count = rdataset->privateuint4;
7226         if (count == 0)
7227                 return (ISC_R_NOMORE);
7228         count--;
7229         rdataset->privateuint4 = count;
7230
7231         /*
7232          * Skip forward one record (length + 4) or one offset (4).
7233          */
7234         raw = rdataset->private5;
7235 #if DNS_RDATASET_FIXED
7236         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7237 #endif
7238                 length = raw[0] * 256 + raw[1];
7239                 raw += length;
7240 #if DNS_RDATASET_FIXED
7241         }
7242         rdataset->private5 = raw + 4;           /* length(2) + order(2) */
7243 #else
7244         rdataset->private5 = raw + 2;           /* length(2) */
7245 #endif
7246
7247         return (ISC_R_SUCCESS);
7248 }
7249
7250 static void
7251 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7252         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
7253 #if DNS_RDATASET_FIXED
7254         unsigned int offset;
7255 #endif
7256         unsigned int length;
7257         isc_region_t r;
7258         unsigned int flags = 0;
7259
7260         REQUIRE(raw != NULL);
7261
7262         /*
7263          * Find the start of the record if not already in private5
7264          * then skip the length and order fields.
7265          */
7266 #if DNS_RDATASET_FIXED
7267         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7268                 offset = (raw[0] << 24) + (raw[1] << 16) +
7269                          (raw[2] << 8) + raw[3];
7270                 raw = rdataset->private3;
7271                 raw += offset;
7272         }
7273 #endif
7274         length = raw[0] * 256 + raw[1];
7275 #if DNS_RDATASET_FIXED
7276         raw += 4;
7277 #else
7278         raw += 2;
7279 #endif
7280         if (rdataset->type == dns_rdatatype_rrsig) {
7281                 if (*raw & DNS_RDATASLAB_OFFLINE)
7282                         flags |= DNS_RDATA_OFFLINE;
7283                 length--;
7284                 raw++;
7285         }
7286         r.length = length;
7287         r.base = raw;
7288         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7289         rdata->flags |= flags;
7290 }
7291
7292 static void
7293 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7294         dns_db_t *db = source->private1;
7295         dns_dbnode_t *node = source->private2;
7296         dns_dbnode_t *cloned_node = NULL;
7297
7298         attachnode(db, node, &cloned_node);
7299         *target = *source;
7300
7301         /*
7302          * Reset iterator state.
7303          */
7304         target->privateuint4 = 0;
7305         target->private5 = NULL;
7306 }
7307
7308 static unsigned int
7309 rdataset_count(dns_rdataset_t *rdataset) {
7310         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7311         unsigned int count;
7312
7313         count = raw[0] * 256 + raw[1];
7314
7315         return (count);
7316 }
7317
7318 static isc_result_t
7319 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7320                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7321 {
7322         dns_db_t *db = rdataset->private1;
7323         dns_dbnode_t *node = rdataset->private2;
7324         dns_dbnode_t *cloned_node;
7325         struct noqname *noqname = rdataset->private6;
7326
7327         cloned_node = NULL;
7328         attachnode(db, node, &cloned_node);
7329         nsec->methods = &rdataset_methods;
7330         nsec->rdclass = db->rdclass;
7331         nsec->type = noqname->type;
7332         nsec->covers = 0;
7333         nsec->ttl = rdataset->ttl;
7334         nsec->trust = rdataset->trust;
7335         nsec->private1 = rdataset->private1;
7336         nsec->private2 = rdataset->private2;
7337         nsec->private3 = noqname->neg;
7338         nsec->privateuint4 = 0;
7339         nsec->private5 = NULL;
7340         nsec->private6 = NULL;
7341         nsec->private7 = NULL;
7342
7343         cloned_node = NULL;
7344         attachnode(db, node, &cloned_node);
7345         nsecsig->methods = &rdataset_methods;
7346         nsecsig->rdclass = db->rdclass;
7347         nsecsig->type = dns_rdatatype_rrsig;
7348         nsecsig->covers = noqname->type;
7349         nsecsig->ttl = rdataset->ttl;
7350         nsecsig->trust = rdataset->trust;
7351         nsecsig->private1 = rdataset->private1;
7352         nsecsig->private2 = rdataset->private2;
7353         nsecsig->private3 = noqname->negsig;
7354         nsecsig->privateuint4 = 0;
7355         nsecsig->private5 = NULL;
7356         nsec->private6 = NULL;
7357         nsec->private7 = NULL;
7358
7359         dns_name_clone(&noqname->name, name);
7360
7361         return (ISC_R_SUCCESS);
7362 }
7363
7364 static isc_result_t
7365 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7366                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7367 {
7368         dns_db_t *db = rdataset->private1;
7369         dns_dbnode_t *node = rdataset->private2;
7370         dns_dbnode_t *cloned_node;
7371         struct noqname *closest = rdataset->private7;
7372
7373         cloned_node = NULL;
7374         attachnode(db, node, &cloned_node);
7375         nsec->methods = &rdataset_methods;
7376         nsec->rdclass = db->rdclass;
7377         nsec->type = closest->type;
7378         nsec->covers = 0;
7379         nsec->ttl = rdataset->ttl;
7380         nsec->trust = rdataset->trust;
7381         nsec->private1 = rdataset->private1;
7382         nsec->private2 = rdataset->private2;
7383         nsec->private3 = closest->neg;
7384         nsec->privateuint4 = 0;
7385         nsec->private5 = NULL;
7386         nsec->private6 = NULL;
7387         nsec->private7 = NULL;
7388
7389         cloned_node = NULL;
7390         attachnode(db, node, &cloned_node);
7391         nsecsig->methods = &rdataset_methods;
7392         nsecsig->rdclass = db->rdclass;
7393         nsecsig->type = dns_rdatatype_rrsig;
7394         nsecsig->covers = closest->type;
7395         nsecsig->ttl = rdataset->ttl;
7396         nsecsig->trust = rdataset->trust;
7397         nsecsig->private1 = rdataset->private1;
7398         nsecsig->private2 = rdataset->private2;
7399         nsecsig->private3 = closest->negsig;
7400         nsecsig->privateuint4 = 0;
7401         nsecsig->private5 = NULL;
7402         nsec->private6 = NULL;
7403         nsec->private7 = NULL;
7404
7405         dns_name_clone(&closest->name, name);
7406
7407         return (ISC_R_SUCCESS);
7408 }
7409
7410 /*
7411  * Rdataset Iterator Methods
7412  */
7413
7414 static void
7415 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7416         rbtdb_rdatasetiter_t *rbtiterator;
7417
7418         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7419
7420         if (rbtiterator->common.version != NULL)
7421                 closeversion(rbtiterator->common.db,
7422                              &rbtiterator->common.version, ISC_FALSE);
7423         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7424         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7425                     sizeof(*rbtiterator));
7426
7427         *iteratorp = NULL;
7428 }
7429
7430 static isc_result_t
7431 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7432         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7433         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7434         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7435         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7436         rdatasetheader_t *header, *top_next;
7437         rbtdb_serial_t serial;
7438         isc_stdtime_t now;
7439
7440         if (IS_CACHE(rbtdb)) {
7441                 serial = 1;
7442                 now = rbtiterator->common.now;
7443         } else {
7444                 serial = rbtversion->serial;
7445                 now = 0;
7446         }
7447
7448         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7449                   isc_rwlocktype_read);
7450
7451         for (header = rbtnode->data; header != NULL; header = top_next) {
7452                 top_next = header->next;
7453                 do {
7454                         if (header->serial <= serial && !IGNORE(header)) {
7455                                 /*
7456                                  * Is this a "this rdataset doesn't exist"
7457                                  * record?  Or is it too old in the cache?
7458                                  *
7459                                  * Note: unlike everywhere else, we
7460                                  * check for now > header->rdh_ttl instead
7461                                  * of now >= header->rdh_ttl.  This allows
7462                                  * ANY and RRSIG queries for 0 TTL
7463                                  * rdatasets to work.
7464                                  */
7465                                 if (NONEXISTENT(header) ||
7466                                     (now != 0 && now > header->rdh_ttl))
7467                                         header = NULL;
7468                                 break;
7469                         } else
7470                                 header = header->down;
7471                 } while (header != NULL);
7472                 if (header != NULL)
7473                         break;
7474         }
7475
7476         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7477                     isc_rwlocktype_read);
7478
7479         rbtiterator->current = header;
7480
7481         if (header == NULL)
7482                 return (ISC_R_NOMORE);
7483
7484         return (ISC_R_SUCCESS);
7485 }
7486
7487 static isc_result_t
7488 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7489         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7490         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7491         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7492         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7493         rdatasetheader_t *header, *top_next;
7494         rbtdb_serial_t serial;
7495         isc_stdtime_t now;
7496         rbtdb_rdatatype_t type, negtype;
7497         dns_rdatatype_t rdtype, covers;
7498
7499         header = rbtiterator->current;
7500         if (header == NULL)
7501                 return (ISC_R_NOMORE);
7502
7503         if (IS_CACHE(rbtdb)) {
7504                 serial = 1;
7505                 now = rbtiterator->common.now;
7506         } else {
7507                 serial = rbtversion->serial;
7508                 now = 0;
7509         }
7510
7511         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7512                   isc_rwlocktype_read);
7513
7514         type = header->type;
7515         rdtype = RBTDB_RDATATYPE_BASE(header->type);
7516         if (rdtype == 0) {
7517                 covers = RBTDB_RDATATYPE_EXT(header->type);
7518                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7519         } else
7520                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7521         for (header = header->next; header != NULL; header = top_next) {
7522                 top_next = header->next;
7523                 /*
7524                  * If not walking back up the down list.
7525                  */
7526                 if (header->type != type && header->type != negtype) {
7527                         do {
7528                                 if (header->serial <= serial &&
7529                                     !IGNORE(header)) {
7530                                         /*
7531                                          * Is this a "this rdataset doesn't
7532                                          * exist" record?
7533                                          *
7534                                          * Note: unlike everywhere else, we
7535                                          * check for now > header->ttl instead
7536                                          * of now >= header->ttl.  This allows
7537                                          * ANY and RRSIG queries for 0 TTL
7538                                          * rdatasets to work.
7539                                          */
7540                                         if ((header->attributes &
7541                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
7542                                             (now != 0 && now > header->rdh_ttl))
7543                                                 header = NULL;
7544                                         break;
7545                                 } else
7546                                         header = header->down;
7547                         } while (header != NULL);
7548                         if (header != NULL)
7549                                 break;
7550                 }
7551         }
7552
7553         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7554                     isc_rwlocktype_read);
7555
7556         rbtiterator->current = header;
7557
7558         if (header == NULL)
7559                 return (ISC_R_NOMORE);
7560
7561         return (ISC_R_SUCCESS);
7562 }
7563
7564 static void
7565 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7566         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7567         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7568         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7569         rdatasetheader_t *header;
7570
7571         header = rbtiterator->current;
7572         REQUIRE(header != NULL);
7573
7574         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7575                   isc_rwlocktype_read);
7576
7577         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7578                       rdataset);
7579
7580         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7581                     isc_rwlocktype_read);
7582 }
7583
7584
7585 /*
7586  * Database Iterator Methods
7587  */
7588
7589 static inline void
7590 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7591         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7592         dns_rbtnode_t *node = rbtdbiter->node;
7593
7594         if (node == NULL)
7595                 return;
7596
7597         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7598         reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7599 }
7600
7601 static inline void
7602 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7603         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7604         dns_rbtnode_t *node = rbtdbiter->node;
7605         nodelock_t *lock;
7606
7607         if (node == NULL)
7608                 return;
7609
7610         lock = &rbtdb->node_locks[node->locknum].lock;
7611         NODE_LOCK(lock, isc_rwlocktype_read);
7612         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7613                             rbtdbiter->tree_locked, ISC_FALSE);
7614         NODE_UNLOCK(lock, isc_rwlocktype_read);
7615
7616         rbtdbiter->node = NULL;
7617 }
7618
7619 static void
7620 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7621         dns_rbtnode_t *node;
7622         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7623         isc_boolean_t was_read_locked = ISC_FALSE;
7624         nodelock_t *lock;
7625         int i;
7626
7627         if (rbtdbiter->delete != 0) {
7628                 /*
7629                  * Note that "%d node of %d in tree" can report things like
7630                  * "flush_deletions: 59 nodes of 41 in tree".  This means
7631                  * That some nodes appear on the deletions list more than
7632                  * once.  Only the last occurence will actually be deleted.
7633                  */
7634                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7635                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7636                               "flush_deletions: %d nodes of %d in tree",
7637                               rbtdbiter->delete,
7638                               dns_rbt_nodecount(rbtdb->tree));
7639
7640                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7641                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7642                         was_read_locked = ISC_TRUE;
7643                 }
7644                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7645                 rbtdbiter->tree_locked = isc_rwlocktype_write;
7646
7647                 for (i = 0; i < rbtdbiter->delete; i++) {
7648                         node = rbtdbiter->deletions[i];
7649                         lock = &rbtdb->node_locks[node->locknum].lock;
7650
7651                         NODE_LOCK(lock, isc_rwlocktype_read);
7652                         decrement_reference(rbtdb, node, 0,
7653                                             isc_rwlocktype_read,
7654                                             rbtdbiter->tree_locked, ISC_FALSE);
7655                         NODE_UNLOCK(lock, isc_rwlocktype_read);
7656                 }
7657
7658                 rbtdbiter->delete = 0;
7659
7660                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7661                 if (was_read_locked) {
7662                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7663                         rbtdbiter->tree_locked = isc_rwlocktype_read;
7664
7665                 } else {
7666                         rbtdbiter->tree_locked = isc_rwlocktype_none;
7667                 }
7668         }
7669 }
7670
7671 static inline void
7672 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
7673         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7674
7675         REQUIRE(rbtdbiter->paused);
7676         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
7677
7678         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7679         rbtdbiter->tree_locked = isc_rwlocktype_read;
7680
7681         rbtdbiter->paused = ISC_FALSE;
7682 }
7683
7684 static void
7685 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
7686         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
7687         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7688         dns_db_t *db = NULL;
7689
7690         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7691                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7692                 rbtdbiter->tree_locked = isc_rwlocktype_none;
7693         } else
7694                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
7695
7696         dereference_iter_node(rbtdbiter);
7697
7698         flush_deletions(rbtdbiter);
7699
7700         dns_db_attach(rbtdbiter->common.db, &db);
7701         dns_db_detach(&rbtdbiter->common.db);
7702
7703         dns_rbtnodechain_reset(&rbtdbiter->chain);
7704         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7705         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
7706         dns_db_detach(&db);
7707
7708         *iteratorp = NULL;
7709 }
7710
7711 static isc_result_t
7712 dbiterator_first(dns_dbiterator_t *iterator) {
7713         isc_result_t result;
7714         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7715         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7716         dns_name_t *name, *origin;
7717
7718         if (rbtdbiter->result != ISC_R_SUCCESS &&
7719             rbtdbiter->result != ISC_R_NOMORE)
7720                 return (rbtdbiter->result);
7721
7722         if (rbtdbiter->paused)
7723                 resume_iteration(rbtdbiter);
7724
7725         dereference_iter_node(rbtdbiter);
7726
7727         name = dns_fixedname_name(&rbtdbiter->name);
7728         origin = dns_fixedname_name(&rbtdbiter->origin);
7729         dns_rbtnodechain_reset(&rbtdbiter->chain);
7730         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7731
7732         if (rbtdbiter->nsec3only) {
7733                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7734                 result = dns_rbtnodechain_first(rbtdbiter->current,
7735                                                 rbtdb->nsec3, name, origin);
7736         } else {
7737                 rbtdbiter->current = &rbtdbiter->chain;
7738                 result = dns_rbtnodechain_first(rbtdbiter->current,
7739                                                 rbtdb->tree, name, origin);
7740                 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
7741                         rbtdbiter->current = &rbtdbiter->nsec3chain;
7742                         result = dns_rbtnodechain_first(rbtdbiter->current,
7743                                                         rbtdb->nsec3, name,
7744                                                         origin);
7745                 }
7746         }
7747         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7748                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7749                                                   NULL, &rbtdbiter->node);
7750                 if (result == ISC_R_SUCCESS) {
7751                         rbtdbiter->new_origin = ISC_TRUE;
7752                         reference_iter_node(rbtdbiter);
7753                 }
7754         } else {
7755                 INSIST(result == ISC_R_NOTFOUND);
7756                 result = ISC_R_NOMORE; /* The tree is empty. */
7757         }
7758
7759         rbtdbiter->result = result;
7760
7761         return (result);
7762 }
7763
7764 static isc_result_t
7765 dbiterator_last(dns_dbiterator_t *iterator) {
7766         isc_result_t result;
7767         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7768         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7769         dns_name_t *name, *origin;
7770
7771         if (rbtdbiter->result != ISC_R_SUCCESS &&
7772             rbtdbiter->result != ISC_R_NOMORE)
7773                 return (rbtdbiter->result);
7774
7775         if (rbtdbiter->paused)
7776                 resume_iteration(rbtdbiter);
7777
7778         dereference_iter_node(rbtdbiter);
7779
7780         name = dns_fixedname_name(&rbtdbiter->name);
7781         origin = dns_fixedname_name(&rbtdbiter->origin);
7782         dns_rbtnodechain_reset(&rbtdbiter->chain);
7783         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7784
7785         result = ISC_R_NOTFOUND;
7786         if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
7787                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7788                 result = dns_rbtnodechain_last(rbtdbiter->current,
7789                                                rbtdb->nsec3, name, origin);
7790         }
7791         if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
7792                 rbtdbiter->current = &rbtdbiter->chain;
7793                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7794                                                name, origin);
7795         }
7796         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7797                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7798                                                   NULL, &rbtdbiter->node);
7799                 if (result == ISC_R_SUCCESS) {
7800                         rbtdbiter->new_origin = ISC_TRUE;
7801                         reference_iter_node(rbtdbiter);
7802                 }
7803         } else {
7804                 INSIST(result == ISC_R_NOTFOUND);
7805                 result = ISC_R_NOMORE; /* The tree is empty. */
7806         }
7807
7808         rbtdbiter->result = result;
7809
7810         return (result);
7811 }
7812
7813 static isc_result_t
7814 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
7815         isc_result_t result;
7816         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7817         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7818         dns_name_t *iname, *origin;
7819
7820         if (rbtdbiter->result != ISC_R_SUCCESS &&
7821             rbtdbiter->result != ISC_R_NOTFOUND &&
7822             rbtdbiter->result != ISC_R_NOMORE)
7823                 return (rbtdbiter->result);
7824
7825         if (rbtdbiter->paused)
7826                 resume_iteration(rbtdbiter);
7827
7828         dereference_iter_node(rbtdbiter);
7829
7830         iname = dns_fixedname_name(&rbtdbiter->name);
7831         origin = dns_fixedname_name(&rbtdbiter->origin);
7832         dns_rbtnodechain_reset(&rbtdbiter->chain);
7833         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7834
7835         if (rbtdbiter->nsec3only) {
7836                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7837                 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7838                                           &rbtdbiter->node,
7839                                           rbtdbiter->current,
7840                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7841         } else if (rbtdbiter->nonsec3) {
7842                 rbtdbiter->current = &rbtdbiter->chain;
7843                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7844                                           &rbtdbiter->node,
7845                                           rbtdbiter->current,
7846                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7847         } else {
7848                 /*
7849                  * Stay on main chain if not found on either chain.
7850                  */
7851                 rbtdbiter->current = &rbtdbiter->chain;
7852                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7853                                           &rbtdbiter->node,
7854                                           rbtdbiter->current,
7855                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7856                 if (result == DNS_R_PARTIALMATCH) {
7857                         dns_rbtnode_t *node = NULL;
7858                         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7859                                                   &node, &rbtdbiter->nsec3chain,
7860                                                   DNS_RBTFIND_EMPTYDATA,
7861                                                   NULL, NULL);
7862                         if (result == ISC_R_SUCCESS) {
7863                                 rbtdbiter->node = node;
7864                                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7865                         }
7866                 }
7867         }
7868
7869 #if 1
7870         if (result == ISC_R_SUCCESS) {
7871                 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
7872                                                   origin, NULL);
7873                 if (result == ISC_R_SUCCESS) {
7874                         rbtdbiter->new_origin = ISC_TRUE;
7875                         reference_iter_node(rbtdbiter);
7876                 }
7877         } else if (result == DNS_R_PARTIALMATCH) {
7878                 result = ISC_R_NOTFOUND;
7879                 rbtdbiter->node = NULL;
7880         }
7881
7882         rbtdbiter->result = result;
7883 #else
7884         if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7885                 isc_result_t tresult;
7886                 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
7887                                                    origin, NULL);
7888                 if (tresult == ISC_R_SUCCESS) {
7889                         rbtdbiter->new_origin = ISC_TRUE;
7890                         reference_iter_node(rbtdbiter);
7891                 } else {
7892                         result = tresult;
7893                         rbtdbiter->node = NULL;
7894                 }
7895         } else
7896                 rbtdbiter->node = NULL;
7897
7898         rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
7899                             ISC_R_SUCCESS : result;
7900 #endif
7901
7902         return (result);
7903 }
7904
7905 static isc_result_t
7906 dbiterator_prev(dns_dbiterator_t *iterator) {
7907         isc_result_t result;
7908         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7909         dns_name_t *name, *origin;
7910         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7911
7912         REQUIRE(rbtdbiter->node != NULL);
7913
7914         if (rbtdbiter->result != ISC_R_SUCCESS)
7915                 return (rbtdbiter->result);
7916
7917         if (rbtdbiter->paused)
7918                 resume_iteration(rbtdbiter);
7919
7920         name = dns_fixedname_name(&rbtdbiter->name);
7921         origin = dns_fixedname_name(&rbtdbiter->origin);
7922         result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
7923         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
7924             !rbtdbiter->nonsec3 &&
7925             &rbtdbiter->nsec3chain == rbtdbiter->current) {
7926                 rbtdbiter->current = &rbtdbiter->chain;
7927                 dns_rbtnodechain_reset(rbtdbiter->current);
7928                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7929                                                name, origin);
7930                 if (result == ISC_R_NOTFOUND)
7931                         result = ISC_R_NOMORE;
7932         }
7933
7934         dereference_iter_node(rbtdbiter);
7935
7936         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
7937                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
7938                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7939                                                   NULL, &rbtdbiter->node);
7940         }
7941
7942         if (result == ISC_R_SUCCESS)
7943                 reference_iter_node(rbtdbiter);
7944
7945         rbtdbiter->result = result;
7946
7947         return (result);
7948 }
7949
7950 static isc_result_t
7951 dbiterator_next(dns_dbiterator_t *iterator) {
7952         isc_result_t result;
7953         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7954         dns_name_t *name, *origin;
7955         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7956
7957         REQUIRE(rbtdbiter->node != NULL);
7958
7959         if (rbtdbiter->result != ISC_R_SUCCESS)
7960                 return (rbtdbiter->result);
7961
7962         if (rbtdbiter->paused)
7963                 resume_iteration(rbtdbiter);
7964
7965         name = dns_fixedname_name(&rbtdbiter->name);
7966         origin = dns_fixedname_name(&rbtdbiter->origin);
7967         result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
7968         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
7969             !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
7970                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7971                 dns_rbtnodechain_reset(rbtdbiter->current);
7972                 result = dns_rbtnodechain_first(rbtdbiter->current,
7973                                                 rbtdb->nsec3, name, origin);
7974                 if (result == ISC_R_NOTFOUND)
7975                         result = ISC_R_NOMORE;
7976         }
7977
7978         dereference_iter_node(rbtdbiter);
7979
7980         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
7981                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
7982                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7983                                                   NULL, &rbtdbiter->node);
7984         }
7985         if (result == ISC_R_SUCCESS)
7986                 reference_iter_node(rbtdbiter);
7987
7988         rbtdbiter->result = result;
7989
7990         return (result);
7991 }
7992
7993 static isc_result_t
7994 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
7995                    dns_name_t *name)
7996 {
7997         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7998         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7999         dns_rbtnode_t *node = rbtdbiter->node;
8000         isc_result_t result;
8001         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8002         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8003
8004         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8005         REQUIRE(rbtdbiter->node != NULL);
8006
8007         if (rbtdbiter->paused)
8008                 resume_iteration(rbtdbiter);
8009
8010         if (name != NULL) {
8011                 if (rbtdbiter->common.relative_names)
8012                         origin = NULL;
8013                 result = dns_name_concatenate(nodename, origin, name, NULL);
8014                 if (result != ISC_R_SUCCESS)
8015                         return (result);
8016                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8017                         result = DNS_R_NEWORIGIN;
8018         } else
8019                 result = ISC_R_SUCCESS;
8020
8021         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8022         new_reference(rbtdb, node);
8023         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8024
8025         *nodep = rbtdbiter->node;
8026
8027         if (iterator->cleaning && result == ISC_R_SUCCESS) {
8028                 isc_result_t expire_result;
8029
8030                 /*
8031                  * If the deletion array is full, flush it before trying
8032                  * to expire the current node.  The current node can't
8033                  * fully deleted while the iteration cursor is still on it.
8034                  */
8035                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8036                         flush_deletions(rbtdbiter);
8037
8038                 expire_result = expirenode(iterator->db, *nodep, 0);
8039
8040                 /*
8041                  * expirenode() currently always returns success.
8042                  */
8043                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8044                         unsigned int refs;
8045
8046                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
8047                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8048                         dns_rbtnode_refincrement(node, &refs);
8049                         INSIST(refs != 0);
8050                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8051                 }
8052         }
8053
8054         return (result);
8055 }
8056
8057 static isc_result_t
8058 dbiterator_pause(dns_dbiterator_t *iterator) {
8059         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8060         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8061
8062         if (rbtdbiter->result != ISC_R_SUCCESS &&
8063             rbtdbiter->result != ISC_R_NOMORE)
8064                 return (rbtdbiter->result);
8065
8066         if (rbtdbiter->paused)
8067                 return (ISC_R_SUCCESS);
8068
8069         rbtdbiter->paused = ISC_TRUE;
8070
8071         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8072                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8073                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8074                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8075         }
8076
8077         flush_deletions(rbtdbiter);
8078
8079         return (ISC_R_SUCCESS);
8080 }
8081
8082 static isc_result_t
8083 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8084         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8085         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8086
8087         if (rbtdbiter->result != ISC_R_SUCCESS)
8088                 return (rbtdbiter->result);
8089
8090         return (dns_name_copy(origin, name, NULL));
8091 }
8092
8093 /*%
8094  * Additional cache routines.
8095  */
8096 static isc_result_t
8097 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8098                        dns_rdatatype_t qtype, dns_acache_t *acache,
8099                        dns_zone_t **zonep, dns_db_t **dbp,
8100                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8101                        dns_name_t *fname, dns_message_t *msg,
8102                        isc_stdtime_t now)
8103 {
8104         dns_rbtdb_t *rbtdb = rdataset->private1;
8105         dns_rbtnode_t *rbtnode = rdataset->private2;
8106         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8107         unsigned int current_count = rdataset->privateuint4;
8108         unsigned int count;
8109         rdatasetheader_t *header;
8110         nodelock_t *nodelock;
8111         unsigned int total_count;
8112         acachectl_t *acarray;
8113         dns_acacheentry_t *entry;
8114         isc_result_t result;
8115
8116         UNUSED(qtype); /* we do not use this value at least for now */
8117         UNUSED(acache);
8118
8119         header = (struct rdatasetheader *)(raw - sizeof(*header));
8120
8121         total_count = raw[0] * 256 + raw[1];
8122         INSIST(total_count > current_count);
8123         count = total_count - current_count - 1;
8124
8125         acarray = NULL;
8126
8127         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8128         NODE_LOCK(nodelock, isc_rwlocktype_read);
8129
8130         switch (type) {
8131         case dns_rdatasetadditional_fromauth:
8132                 acarray = header->additional_auth;
8133                 break;
8134         case dns_rdatasetadditional_fromcache:
8135                 acarray = NULL;
8136                 break;
8137         case dns_rdatasetadditional_fromglue:
8138                 acarray = header->additional_glue;
8139                 break;
8140         default:
8141                 INSIST(0);
8142         }
8143
8144         if (acarray == NULL) {
8145                 if (type != dns_rdatasetadditional_fromcache)
8146                         dns_acache_countquerymiss(acache);
8147                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8148                 return (ISC_R_NOTFOUND);
8149         }
8150
8151         if (acarray[count].entry == NULL) {
8152                 dns_acache_countquerymiss(acache);
8153                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8154                 return (ISC_R_NOTFOUND);
8155         }
8156
8157         entry = NULL;
8158         dns_acache_attachentry(acarray[count].entry, &entry);
8159
8160         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8161
8162         result = dns_acache_getentry(entry, zonep, dbp, versionp,
8163                                      nodep, fname, msg, now);
8164
8165         dns_acache_detachentry(&entry);
8166
8167         return (result);
8168 }
8169
8170 static void
8171 acache_callback(dns_acacheentry_t *entry, void **arg) {
8172         dns_rbtdb_t *rbtdb;
8173         dns_rbtnode_t *rbtnode;
8174         nodelock_t *nodelock;
8175         acachectl_t *acarray = NULL;
8176         acache_cbarg_t *cbarg;
8177         unsigned int count;
8178
8179         REQUIRE(arg != NULL);
8180         cbarg = *arg;
8181
8182         /*
8183          * The caller must hold the entry lock.
8184          */
8185
8186         rbtdb = (dns_rbtdb_t *)cbarg->db;
8187         rbtnode = (dns_rbtnode_t *)cbarg->node;
8188
8189         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8190         NODE_LOCK(nodelock, isc_rwlocktype_write);
8191
8192         switch (cbarg->type) {
8193         case dns_rdatasetadditional_fromauth:
8194                 acarray = cbarg->header->additional_auth;
8195                 break;
8196         case dns_rdatasetadditional_fromglue:
8197                 acarray = cbarg->header->additional_glue;
8198                 break;
8199         default:
8200                 INSIST(0);
8201         }
8202
8203         count = cbarg->count;
8204         if (acarray != NULL && acarray[count].entry == entry) {
8205                 acarray[count].entry = NULL;
8206                 INSIST(acarray[count].cbarg == cbarg);
8207                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8208                 acarray[count].cbarg = NULL;
8209         } else
8210                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8211
8212         dns_acache_detachentry(&entry);
8213
8214         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8215
8216         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8217         dns_db_detach((dns_db_t **)(void*)&rbtdb);
8218
8219         *arg = NULL;
8220 }
8221
8222 static void
8223 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8224                       acache_cbarg_t **cbargp)
8225 {
8226         acache_cbarg_t *cbarg;
8227
8228         REQUIRE(mctx != NULL);
8229         REQUIRE(entry != NULL);
8230         REQUIRE(cbargp != NULL && *cbargp != NULL);
8231
8232         cbarg = *cbargp;
8233
8234         dns_acache_cancelentry(entry);
8235         dns_db_detachnode(cbarg->db, &cbarg->node);
8236         dns_db_detach(&cbarg->db);
8237
8238         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8239
8240         *cbargp = NULL;
8241 }
8242
8243 static isc_result_t
8244 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8245                        dns_rdatatype_t qtype, dns_acache_t *acache,
8246                        dns_zone_t *zone, dns_db_t *db,
8247                        dns_dbversion_t *version, dns_dbnode_t *node,
8248                        dns_name_t *fname)
8249 {
8250         dns_rbtdb_t *rbtdb = rdataset->private1;
8251         dns_rbtnode_t *rbtnode = rdataset->private2;
8252         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8253         unsigned int current_count = rdataset->privateuint4;
8254         rdatasetheader_t *header;
8255         unsigned int total_count, count;
8256         nodelock_t *nodelock;
8257         isc_result_t result;
8258         acachectl_t *acarray;
8259         dns_acacheentry_t *newentry, *oldentry = NULL;
8260         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8261
8262         UNUSED(qtype);
8263
8264         if (type == dns_rdatasetadditional_fromcache)
8265                 return (ISC_R_SUCCESS);
8266
8267         header = (struct rdatasetheader *)(raw - sizeof(*header));
8268
8269         total_count = raw[0] * 256 + raw[1];
8270         INSIST(total_count > current_count);
8271         count = total_count - current_count - 1; /* should be private data */
8272
8273         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8274         if (newcbarg == NULL)
8275                 return (ISC_R_NOMEMORY);
8276         newcbarg->type = type;
8277         newcbarg->count = count;
8278         newcbarg->header = header;
8279         newcbarg->db = NULL;
8280         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8281         newcbarg->node = NULL;
8282         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8283                           &newcbarg->node);
8284         newentry = NULL;
8285         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8286                                         acache_callback, newcbarg, &newentry);
8287         if (result != ISC_R_SUCCESS)
8288                 goto fail;
8289         /* Set cache data in the new entry. */
8290         result = dns_acache_setentry(acache, newentry, zone, db,
8291                                      version, node, fname);
8292         if (result != ISC_R_SUCCESS)
8293                 goto fail;
8294
8295         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8296         NODE_LOCK(nodelock, isc_rwlocktype_write);
8297
8298         acarray = NULL;
8299         switch (type) {
8300         case dns_rdatasetadditional_fromauth:
8301                 acarray = header->additional_auth;
8302                 break;
8303         case dns_rdatasetadditional_fromglue:
8304                 acarray = header->additional_glue;
8305                 break;
8306         default:
8307                 INSIST(0);
8308         }
8309
8310         if (acarray == NULL) {
8311                 unsigned int i;
8312
8313                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8314                                       sizeof(acachectl_t));
8315
8316                 if (acarray == NULL) {
8317                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8318                         goto fail;
8319                 }
8320
8321                 for (i = 0; i < total_count; i++) {
8322                         acarray[i].entry = NULL;
8323                         acarray[i].cbarg = NULL;
8324                 }
8325         }
8326         switch (type) {
8327         case dns_rdatasetadditional_fromauth:
8328                 header->additional_auth = acarray;
8329                 break;
8330         case dns_rdatasetadditional_fromglue:
8331                 header->additional_glue = acarray;
8332                 break;
8333         default:
8334                 INSIST(0);
8335         }
8336
8337         if (acarray[count].entry != NULL) {
8338                 /*
8339                  * Swap the entry.  Delay cleaning-up the old entry since
8340                  * it would require a node lock.
8341                  */
8342                 oldentry = acarray[count].entry;
8343                 INSIST(acarray[count].cbarg != NULL);
8344                 oldcbarg = acarray[count].cbarg;
8345         }
8346         acarray[count].entry = newentry;
8347         acarray[count].cbarg = newcbarg;
8348
8349         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8350
8351         if (oldentry != NULL) {
8352                 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8353                 dns_acache_detachentry(&oldentry);
8354         }
8355
8356         return (ISC_R_SUCCESS);
8357
8358  fail:
8359         if (newcbarg != NULL) {
8360                 if (newentry != NULL) {
8361                         acache_cancelentry(rbtdb->common.mctx, newentry,
8362                                            &newcbarg);
8363                         dns_acache_detachentry(&newentry);
8364                 } else {
8365                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8366                         dns_db_detach(&newcbarg->db);
8367                         isc_mem_put(rbtdb->common.mctx, newcbarg,
8368                             sizeof(*newcbarg));
8369                 }
8370         }
8371
8372         return (result);
8373 }
8374
8375 static isc_result_t
8376 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8377                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8378 {
8379         dns_rbtdb_t *rbtdb = rdataset->private1;
8380         dns_rbtnode_t *rbtnode = rdataset->private2;
8381         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8382         unsigned int current_count = rdataset->privateuint4;
8383         rdatasetheader_t *header;
8384         nodelock_t *nodelock;
8385         unsigned int total_count, count;
8386         acachectl_t *acarray;
8387         dns_acacheentry_t *entry;
8388         acache_cbarg_t *cbarg;
8389
8390         UNUSED(qtype);          /* we do not use this value at least for now */
8391         UNUSED(acache);
8392
8393         if (type == dns_rdatasetadditional_fromcache)
8394                 return (ISC_R_SUCCESS);
8395
8396         header = (struct rdatasetheader *)(raw - sizeof(*header));
8397
8398         total_count = raw[0] * 256 + raw[1];
8399         INSIST(total_count > current_count);
8400         count = total_count - current_count - 1;
8401
8402         acarray = NULL;
8403         entry = NULL;
8404
8405         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8406         NODE_LOCK(nodelock, isc_rwlocktype_write);
8407
8408         switch (type) {
8409         case dns_rdatasetadditional_fromauth:
8410                 acarray = header->additional_auth;
8411                 break;
8412         case dns_rdatasetadditional_fromglue:
8413                 acarray = header->additional_glue;
8414                 break;
8415         default:
8416                 INSIST(0);
8417         }
8418
8419         if (acarray == NULL) {
8420                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8421                 return (ISC_R_NOTFOUND);
8422         }
8423
8424         entry = acarray[count].entry;
8425         if (entry == NULL) {
8426                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8427                 return (ISC_R_NOTFOUND);
8428         }
8429
8430         acarray[count].entry = NULL;
8431         cbarg = acarray[count].cbarg;
8432         acarray[count].cbarg = NULL;
8433
8434         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8435
8436         if (entry != NULL) {
8437                 if (cbarg != NULL)
8438                         acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8439                 dns_acache_detachentry(&entry);
8440         }
8441
8442         return (ISC_R_SUCCESS);
8443 }
8444
8445 /*%
8446  * Routines for LRU-based cache management.
8447  */
8448
8449 /*%
8450  * See if a given cache entry that is being reused needs to be updated
8451  * in the LRU-list.  From the LRU management point of view, this function is
8452  * expected to return true for almost all cases.  When used with threads,
8453  * however, this may cause a non-negligible performance penalty because a
8454  * writer lock will have to be acquired before updating the list.
8455  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8456  * function returns true if the entry has not been updated for some period of
8457  * time.  We differentiate the NS or glue address case and the others since
8458  * experiments have shown that the former tends to be accessed relatively
8459  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8460  * may cause external queries at a higher level zone, involving more
8461  * transactions).
8462  *
8463  * Caller must hold the node (read or write) lock.
8464  */
8465 static inline isc_boolean_t
8466 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8467         if ((header->attributes &
8468              (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8469                 return (ISC_FALSE);
8470
8471 #if DNS_RBTDB_LIMITLRUUPDATE
8472         if (header->type == dns_rdatatype_ns ||
8473             (header->trust == dns_trust_glue &&
8474              (header->type == dns_rdatatype_a ||
8475               header->type == dns_rdatatype_aaaa))) {
8476                 /*
8477                  * Glue records are updated if at least 60 seconds have passed
8478                  * since the previous update time.
8479                  */
8480                 return (header->last_used + 60 <= now);
8481         }
8482
8483         /* Other records are updated if 5 minutes have passed. */
8484         return (header->last_used + 300 <= now);
8485 #else
8486         UNUSED(now);
8487
8488         return (ISC_TRUE);
8489 #endif
8490 }
8491
8492 /*%
8493  * Update the timestamp of a given cache entry and move it to the head
8494  * of the corresponding LRU list.
8495  *
8496  * Caller must hold the node (write) lock.
8497  *
8498  * Note that the we do NOT touch the heap here, as the TTL has not changed.
8499  */
8500 static void
8501 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8502               isc_stdtime_t now)
8503 {
8504         INSIST(IS_CACHE(rbtdb));
8505
8506         /* To be checked: can we really assume this? XXXMLG */
8507         INSIST(ISC_LINK_LINKED(header, link));
8508
8509         ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
8510         header->last_used = now;
8511         ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
8512 }
8513
8514 /*%
8515  * Purge some expired and/or stale (i.e. unused for some period) cache entries
8516  * under an overmem condition.  To recover from this condition quickly, up to
8517  * 2 entries will be purged.  This process is triggered while adding a new
8518  * entry, and we specifically avoid purging entries in the same LRU bucket as
8519  * the one to which the new entry will belong.  Otherwise, we might purge
8520  * entries of the same name of different RR types while adding RRsets from a
8521  * single response (consider the case where we're adding A and AAAA glue records
8522  * of the same NS name).
8523  */
8524 static void
8525 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8526               isc_stdtime_t now, isc_boolean_t tree_locked)
8527 {
8528         rdatasetheader_t *header, *header_prev;
8529         unsigned int locknum;
8530         int purgecount = 2;
8531
8532         for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8533              locknum != locknum_start && purgecount > 0;
8534              locknum = (locknum + 1) % rbtdb->node_lock_count) {
8535                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8536                           isc_rwlocktype_write);
8537
8538                 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8539                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8540                         expire_header(rbtdb, header, tree_locked);
8541                         purgecount--;
8542                 }
8543
8544                 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8545                      header != NULL && purgecount > 0;
8546                      header = header_prev) {
8547                         header_prev = ISC_LIST_PREV(header, link);
8548                         /*
8549                          * Unlink the entry at this point to avoid checking it
8550                          * again even if it's currently used someone else and
8551                          * cannot be purged at this moment.  This entry won't be
8552                          * referenced any more (so unlinking is safe) since the
8553                          * TTL was reset to 0.
8554                          */
8555                         ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8556                                         link);
8557                         expire_header(rbtdb, header, tree_locked);
8558                         purgecount--;
8559                 }
8560
8561                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8562                                     isc_rwlocktype_write);
8563         }
8564 }
8565
8566 static void
8567 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8568               isc_boolean_t tree_locked)
8569 {
8570         set_ttl(rbtdb, header, 0);
8571         header->attributes |= RDATASET_ATTR_STALE;
8572         header->node->dirty = 1;
8573
8574         /*
8575          * Caller must hold the node (write) lock.
8576          */
8577
8578         if (dns_rbtnode_refcurrent(header->node) == 0) {
8579                 /*
8580                  * If no one else is using the node, we can clean it up now.
8581                  * We first need to gain a new reference to the node to meet a
8582                  * requirement of decrement_reference().
8583                  */
8584                 new_reference(rbtdb, header->node);
8585                 decrement_reference(rbtdb, header->node, 0,
8586                                     isc_rwlocktype_write,
8587                                     tree_locked ? isc_rwlocktype_write :
8588                                     isc_rwlocktype_none, ISC_FALSE);
8589         }
8590 }