]> CyberLeo.Net >> Repos - FreeBSD/releng/8.2.git/blob - contrib/bind9/lib/dns/rbtdb.c
Fix a problem whereby a corrupt DNS record can cause named to crash. [11:06]
[FreeBSD/releng/8.2.git] / contrib / bind9 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004-2010  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: rbtdb.c,v 1.270.12.16.10.6 2010/11/16 07:46:23 marka Exp $ */
19
20 /*! \file */
21
22 /*
23  * Principal Author: Bob Halley
24  */
25
26 #include <config.h>
27
28 /* #define inline */
29
30 #include <isc/event.h>
31 #include <isc/heap.h>
32 #include <isc/mem.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
41 #include <isc/task.h>
42 #include <isc/time.h>
43 #include <isc/util.h>
44
45 #include <dns/acache.h>
46 #include <dns/db.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
50 #include <dns/lib.h>
51 #include <dns/log.h>
52 #include <dns/masterdump.h>
53 #include <dns/nsec.h>
54 #include <dns/nsec3.h>
55 #include <dns/rbt.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
63 #include <dns/view.h>
64 #include <dns/zone.h>
65 #include <dns/zonekey.h>
66
67 #ifdef DNS_RBTDB_VERSION64
68 #include "rbtdb64.h"
69 #else
70 #include "rbtdb.h"
71 #endif
72
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
75 #else
76 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
77 #endif
78
79 /*%
80  * Note that "impmagic" is not the first four bytes of the struct, so
81  * ISC_MAGIC_VALID cannot be used.
82  */
83 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
84                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
85
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t                    rbtdb_serial_t;
88 /*%
89  * Make casting easier in symbolic debuggers by using different names
90  * for the 64 bit version.
91  */
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
95 #else
96 typedef isc_uint32_t                    rbtdb_serial_t;
97 #endif
98
99 typedef isc_uint32_t                    rbtdb_rdatatype_t;
100
101 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
104
105 #define RBTDB_RDATATYPE_SIGNSEC \
106                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_NCACHEANY \
116                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
117
118 /*
119  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
120  * Using rwlock is effective with regard to lookup performance only when
121  * it is implemented in an efficient way.
122  * Otherwise, it is generally wise to stick to the simple locking since rwlock
123  * would require more memory or can even make lookups slower due to its own
124  * overhead (when it internally calls mutex locks).
125  */
126 #ifdef ISC_RWLOCK_USEATOMIC
127 #define DNS_RBTDB_USERWLOCK 1
128 #else
129 #define DNS_RBTDB_USERWLOCK 0
130 #endif
131
132 #if DNS_RBTDB_USERWLOCK
133 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
134 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
135 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
136 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
137 #else
138 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
139 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
140 #define RBTDB_LOCK(l, t)        LOCK(l)
141 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
142 #endif
143
144 /*
145  * Since node locking is sensitive to both performance and memory footprint,
146  * we need some trick here.  If we have both high-performance rwlock and
147  * high performance and small-memory reference counters, we use rwlock for
148  * node lock and isc_refcount for node references.  In this case, we don't have
149  * to protect the access to the counters by locks.
150  * Otherwise, we simply use ordinary mutex lock for node locking, and use
151  * simple integers as reference counters which is protected by the lock.
152  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
153  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
154  * counters first and then protect other parts of a node as read-only data.
155  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
156  * provided for these special cases.  When we can use the efficient backend
157  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
158  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
159  * section including the access to the reference counter.
160  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
161  * section is also protected by NODE_STRONGLOCK().
162  */
163 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
164 typedef isc_rwlock_t nodelock_t;
165
166 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
167 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
168 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
169 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
170 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
171
172 #define NODE_STRONGLOCK(l)      ((void)0)
173 #define NODE_STRONGUNLOCK(l)    ((void)0)
174 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
175 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
176 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
177 #else
178 typedef isc_mutex_t nodelock_t;
179
180 #define NODE_INITLOCK(l)        isc_mutex_init(l)
181 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
182 #define NODE_LOCK(l, t)         LOCK(l)
183 #define NODE_UNLOCK(l, t)       UNLOCK(l)
184 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
185
186 #define NODE_STRONGLOCK(l)      LOCK(l)
187 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
188 #define NODE_WEAKLOCK(l, t)     ((void)0)
189 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
190 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
191 #endif
192
193 /*%
194  * Whether to rate-limit updating the LRU to avoid possible thread contention.
195  * Our performance measurement has shown the cost is marginal, so it's defined
196  * to be 0 by default either with or without threads.
197  */
198 #ifndef DNS_RBTDB_LIMITLRUUPDATE
199 #define DNS_RBTDB_LIMITLRUUPDATE 0
200 #endif
201
202 /*
203  * Allow clients with a virtual time of up to 5 minutes in the past to see
204  * records that would have otherwise have expired.
205  */
206 #define RBTDB_VIRTUAL 300
207
208 struct noqname {
209         dns_name_t      name;
210         void *          neg;
211         void *          negsig;
212         dns_rdatatype_t type;
213 };
214
215 typedef struct acachectl acachectl_t;
216
217 typedef struct rdatasetheader {
218         /*%
219          * Locked by the owning node's lock.
220          */
221         rbtdb_serial_t                  serial;
222         dns_ttl_t                       rdh_ttl;
223         rbtdb_rdatatype_t               type;
224         isc_uint16_t                    attributes;
225         dns_trust_t                     trust;
226         struct noqname                  *noqname;
227         struct noqname                  *closest;
228         /*%<
229          * We don't use the LIST macros, because the LIST structure has
230          * both head and tail pointers, and is doubly linked.
231          */
232
233         struct rdatasetheader           *next;
234         /*%<
235          * If this is the top header for an rdataset, 'next' points
236          * to the top header for the next rdataset (i.e., the next type).
237          * Otherwise, it points up to the header whose down pointer points
238          * at this header.
239          */
240
241         struct rdatasetheader           *down;
242         /*%<
243          * Points to the header for the next older version of
244          * this rdataset.
245          */
246
247         isc_uint32_t                    count;
248         /*%<
249          * Monotonously increased every time this rdataset is bound so that
250          * it is used as the base of the starting point in DNS responses
251          * when the "cyclic" rrset-order is required.  Since the ordering
252          * should not be so crucial, no lock is set for the counter for
253          * performance reasons.
254          */
255
256         acachectl_t                     *additional_auth;
257         acachectl_t                     *additional_glue;
258
259         dns_rbtnode_t                   *node;
260         isc_stdtime_t                   last_used;
261         ISC_LINK(struct rdatasetheader) link;
262
263         unsigned int                    heap_index;
264         /*%<
265          * Used for TTL-based cache cleaning.
266          */
267         isc_stdtime_t                   resign;
268 } rdatasetheader_t;
269
270 typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
271 typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;
272
273 #define RDATASET_ATTR_NONEXISTENT       0x0001
274 #define RDATASET_ATTR_STALE             0x0002
275 #define RDATASET_ATTR_IGNORE            0x0004
276 #define RDATASET_ATTR_RETAIN            0x0008
277 #define RDATASET_ATTR_NXDOMAIN          0x0010
278 #define RDATASET_ATTR_RESIGN            0x0020
279 #define RDATASET_ATTR_STATCOUNT         0x0040
280 #define RDATASET_ATTR_OPTOUT            0x0080
281 #define RDATASET_ATTR_NEGATIVE          0x0100
282
283 typedef struct acache_cbarg {
284         dns_rdatasetadditional_t        type;
285         unsigned int                    count;
286         dns_db_t                        *db;
287         dns_dbnode_t                    *node;
288         rdatasetheader_t                *header;
289 } acache_cbarg_t;
290
291 struct acachectl {
292         dns_acacheentry_t               *entry;
293         acache_cbarg_t                  *cbarg;
294 };
295
296 /*
297  * XXX
298  * When the cache will pre-expire data (due to memory low or other
299  * situations) before the rdataset's TTL has expired, it MUST
300  * respect the RETAIN bit and not expire the data until its TTL is
301  * expired.
302  */
303
304 #undef IGNORE                   /* WIN32 winbase.h defines this. */
305
306 #define EXISTS(header) \
307         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
308 #define NONEXISTENT(header) \
309         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
310 #define IGNORE(header) \
311         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
312 #define RETAIN(header) \
313         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
314 #define NXDOMAIN(header) \
315         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
316 #define RESIGN(header) \
317         (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
318 #define OPTOUT(header) \
319         (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
320 #define NEGATIVE(header) \
321         (((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
322
323 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
324
325 /*%
326  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
327  * There is a tradeoff issue about configuring this value: if this is too
328  * small, it may cause heavier contention between threads; if this is too large,
329  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
330  * The default value should work well for most environments, but this can
331  * also be configurable at compilation time via the
332  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
333  * 1 due to the assumption of overmem_purge().
334  */
335 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
336 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
337 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
338 #else
339 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
340 #endif
341 #else
342 #define DEFAULT_CACHE_NODE_LOCK_COUNT   16
343 #endif  /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
344
345 typedef struct {
346         nodelock_t                      lock;
347         /* Protected in the refcount routines. */
348         isc_refcount_t                  references;
349         /* Locked by lock. */
350         isc_boolean_t                   exiting;
351 } rbtdb_nodelock_t;
352
353 typedef struct rbtdb_changed {
354         dns_rbtnode_t *                 node;
355         isc_boolean_t                   dirty;
356         ISC_LINK(struct rbtdb_changed)  link;
357 } rbtdb_changed_t;
358
359 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
360
361 typedef enum {
362         dns_db_insecure,
363         dns_db_partial,
364         dns_db_secure
365 } dns_db_secure_t;
366
367 typedef struct rbtdb_version {
368         /* Not locked */
369         rbtdb_serial_t                  serial;
370         /*
371          * Protected in the refcount routines.
372          * XXXJT: should we change the lock policy based on the refcount
373          * performance?
374          */
375         isc_refcount_t                  references;
376         /* Locked by database lock. */
377         isc_boolean_t                   writer;
378         isc_boolean_t                   commit_ok;
379         rbtdb_changedlist_t             changed_list;
380         rdatasetheaderlist_t            resigned_list;
381         ISC_LINK(struct rbtdb_version)  link;
382         dns_db_secure_t                 secure;
383         isc_boolean_t                   havensec3;
384         /* NSEC3 parameters */
385         dns_hash_t                      hash;
386         isc_uint8_t                     flags;
387         isc_uint16_t                    iterations;
388         isc_uint8_t                     salt_length;
389         unsigned char                   salt[DNS_NSEC3_SALTSIZE];
390 } rbtdb_version_t;
391
392 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
393
394 typedef struct {
395         /* Unlocked. */
396         dns_db_t                        common;
397 #if DNS_RBTDB_USERWLOCK
398         isc_rwlock_t                    lock;
399 #else
400         isc_mutex_t                     lock;
401 #endif
402         isc_rwlock_t                    tree_lock;
403         unsigned int                    node_lock_count;
404         rbtdb_nodelock_t *              node_locks;
405         dns_rbtnode_t *                 origin_node;
406         dns_stats_t *                   rrsetstats; /* cache DB only */
407         /* Locked by lock. */
408         unsigned int                    active;
409         isc_refcount_t                  references;
410         unsigned int                    attributes;
411         rbtdb_serial_t                  current_serial;
412         rbtdb_serial_t                  least_serial;
413         rbtdb_serial_t                  next_serial;
414         rbtdb_version_t *               current_version;
415         rbtdb_version_t *               future_version;
416         rbtdb_versionlist_t             open_versions;
417         isc_task_t *                    task;
418         dns_dbnode_t                    *soanode;
419         dns_dbnode_t                    *nsnode;
420
421         /*
422          * This is a linked list used to implement the LRU cache.  There will
423          * be node_lock_count linked lists here.  Nodes in bucket 1 will be
424          * placed on the linked list rdatasets[1].
425          */
426         rdatasetheaderlist_t            *rdatasets;
427
428         /*%
429          * Temporary storage for stale cache nodes and dynamically deleted
430          * nodes that await being cleaned up.
431          */
432         rbtnodelist_t                   *deadnodes;
433
434         /*
435          * Heaps.  Each of these is used for TTL based expiry.
436          */
437         isc_heap_t                      **heaps;
438
439         /* Locked by tree_lock. */
440         dns_rbt_t *                     tree;
441         dns_rbt_t *                     nsec3;
442
443         /* Unlocked */
444         unsigned int                    quantum;
445 } dns_rbtdb_t;
446
447 #define RBTDB_ATTR_LOADED               0x01
448 #define RBTDB_ATTR_LOADING              0x02
449
450 /*%
451  * Search Context
452  */
453 typedef struct {
454         dns_rbtdb_t *           rbtdb;
455         rbtdb_version_t *       rbtversion;
456         rbtdb_serial_t          serial;
457         unsigned int            options;
458         dns_rbtnodechain_t      chain;
459         isc_boolean_t           copy_name;
460         isc_boolean_t           need_cleanup;
461         isc_boolean_t           wild;
462         dns_rbtnode_t *         zonecut;
463         rdatasetheader_t *      zonecut_rdataset;
464         rdatasetheader_t *      zonecut_sigrdataset;
465         dns_fixedname_t         zonecut_name;
466         isc_stdtime_t           now;
467 } rbtdb_search_t;
468
469 /*%
470  * Load Context
471  */
472 typedef struct {
473         dns_rbtdb_t *           rbtdb;
474         isc_stdtime_t           now;
475 } rbtdb_load_t;
476
477 static void rdataset_disassociate(dns_rdataset_t *rdataset);
478 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
479 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
480 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
481 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
482 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
483 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
484                                         dns_name_t *name,
485                                         dns_rdataset_t *neg,
486                                         dns_rdataset_t *negsig);
487 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
488                                         dns_name_t *name,
489                                         dns_rdataset_t *neg,
490                                         dns_rdataset_t *negsig);
491 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
492                                            dns_rdatasetadditional_t type,
493                                            dns_rdatatype_t qtype,
494                                            dns_acache_t *acache,
495                                            dns_zone_t **zonep,
496                                            dns_db_t **dbp,
497                                            dns_dbversion_t **versionp,
498                                            dns_dbnode_t **nodep,
499                                            dns_name_t *fname,
500                                            dns_message_t *msg,
501                                            isc_stdtime_t now);
502 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
503                                            dns_rdatasetadditional_t type,
504                                            dns_rdatatype_t qtype,
505                                            dns_acache_t *acache,
506                                            dns_zone_t *zone,
507                                            dns_db_t *db,
508                                            dns_dbversion_t *version,
509                                            dns_dbnode_t *node,
510                                            dns_name_t *fname);
511 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
512                                            dns_rdataset_t *rdataset,
513                                            dns_rdatasetadditional_t type,
514                                            dns_rdatatype_t qtype);
515 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
516                                               isc_stdtime_t now);
517 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
518                           isc_stdtime_t now);
519 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
520                           isc_boolean_t tree_locked);
521 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
522                           isc_stdtime_t now, isc_boolean_t tree_locked);
523 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
524                                   rdatasetheader_t *newheader);
525 static void prune_tree(isc_task_t *task, isc_event_t *event);
526 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
527 static void rdataset_expire(dns_rdataset_t *rdataset);
528
529 static dns_rdatasetmethods_t rdataset_methods = {
530         rdataset_disassociate,
531         rdataset_first,
532         rdataset_next,
533         rdataset_current,
534         rdataset_clone,
535         rdataset_count,
536         NULL,
537         rdataset_getnoqname,
538         NULL,
539         rdataset_getclosest,
540         rdataset_getadditional,
541         rdataset_setadditional,
542         rdataset_putadditional,
543         rdataset_settrust,
544         rdataset_expire
545 };
546
547 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
548 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
549 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
550 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
551                                  dns_rdataset_t *rdataset);
552
553 static dns_rdatasetitermethods_t rdatasetiter_methods = {
554         rdatasetiter_destroy,
555         rdatasetiter_first,
556         rdatasetiter_next,
557         rdatasetiter_current
558 };
559
560 typedef struct rbtdb_rdatasetiter {
561         dns_rdatasetiter_t              common;
562         rdatasetheader_t *              current;
563 } rbtdb_rdatasetiter_t;
564
565 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
566 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
567 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
568 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
569                                         dns_name_t *name);
570 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
571 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
572 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
573                                            dns_dbnode_t **nodep,
574                                            dns_name_t *name);
575 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
576 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
577                                           dns_name_t *name);
578
579 static dns_dbiteratormethods_t dbiterator_methods = {
580         dbiterator_destroy,
581         dbiterator_first,
582         dbiterator_last,
583         dbiterator_seek,
584         dbiterator_prev,
585         dbiterator_next,
586         dbiterator_current,
587         dbiterator_pause,
588         dbiterator_origin
589 };
590
591 #define DELETION_BATCH_MAX 64
592
593 /*
594  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
595  */
596 typedef struct rbtdb_dbiterator {
597         dns_dbiterator_t                common;
598         isc_boolean_t                   paused;
599         isc_boolean_t                   new_origin;
600         isc_rwlocktype_t                tree_locked;
601         isc_result_t                    result;
602         dns_fixedname_t                 name;
603         dns_fixedname_t                 origin;
604         dns_rbtnodechain_t              chain;
605         dns_rbtnodechain_t              nsec3chain;
606         dns_rbtnodechain_t              *current;
607         dns_rbtnode_t                   *node;
608         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
609         int                             delete;
610         isc_boolean_t                   nsec3only;
611         isc_boolean_t                   nonsec3;
612 } rbtdb_dbiterator_t;
613
614
615 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
616 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
617
618 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
619                        isc_event_t *event);
620 static void overmem(dns_db_t *db, isc_boolean_t overmem);
621 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
622                                isc_boolean_t *nsec3createflag);
623
624 /*%
625  * 'init_count' is used to initialize 'newheader->count' which inturn
626  * is used to determine where in the cycle rrset-order cyclic starts.
627  * We don't lock this as we don't care about simultaneous updates.
628  *
629  * Note:
630  *      Both init_count and header->count can be ISC_UINT32_MAX.
631  *      The count on the returned rdataset however can't be as
632  *      that indicates that the database does not implement cyclic
633  *      processing.
634  */
635 static unsigned int init_count;
636
637 /*
638  * Locking
639  *
640  * If a routine is going to lock more than one lock in this module, then
641  * the locking must be done in the following order:
642  *
643  *      Tree Lock
644  *
645  *      Node Lock       (Only one from the set may be locked at one time by
646  *                       any caller)
647  *
648  *      Database Lock
649  *
650  * Failure to follow this hierarchy can result in deadlock.
651  */
652
653 /*
654  * Deleting Nodes
655  *
656  * For zone databases the node for the origin of the zone MUST NOT be deleted.
657  */
658
659
660 /*
661  * DB Routines
662  */
663
664 static void
665 attach(dns_db_t *source, dns_db_t **targetp) {
666         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
667
668         REQUIRE(VALID_RBTDB(rbtdb));
669
670         isc_refcount_increment(&rbtdb->references, NULL);
671
672         *targetp = source;
673 }
674
675 static void
676 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
677         dns_rbtdb_t *rbtdb = event->ev_arg;
678
679         UNUSED(task);
680
681         free_rbtdb(rbtdb, ISC_TRUE, event);
682 }
683
684 static void
685 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
686                   isc_boolean_t increment)
687 {
688         dns_rdatastatstype_t statattributes = 0;
689         dns_rdatastatstype_t base = 0;
690         dns_rdatastatstype_t type;
691
692         /* At the moment we count statistics only for cache DB */
693         INSIST(IS_CACHE(rbtdb));
694
695         if (NXDOMAIN(header))
696                 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
697         else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
698                 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
699                 base = RBTDB_RDATATYPE_EXT(header->type);
700         } else
701                 base = RBTDB_RDATATYPE_BASE(header->type);
702
703         type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
704         if (increment)
705                 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
706         else
707                 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
708 }
709
710 static void
711 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
712         int idx;
713         isc_heap_t *heap;
714         dns_ttl_t oldttl;
715
716         oldttl = header->rdh_ttl;
717         header->rdh_ttl = newttl;
718
719         if (!IS_CACHE(rbtdb))
720                 return;
721
722         /*
723          * It's possible the rbtdb is not a cache.  If this is the case,
724          * we will not have a heap, and we move on.  If we do, though,
725          * we might need to adjust things.
726          */
727         if (header->heap_index == 0 || newttl == oldttl)
728                 return;
729         idx = header->node->locknum;
730         if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
731             return;
732         heap = rbtdb->heaps[idx];
733
734         if (newttl < oldttl)
735                 isc_heap_increased(heap, header->heap_index);
736         else
737                 isc_heap_decreased(heap, header->heap_index);
738 }
739
740 /*%
741  * These functions allow the heap code to rank the priority of each
742  * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
743  */
744 static isc_boolean_t
745 ttl_sooner(void *v1, void *v2) {
746         rdatasetheader_t *h1 = v1;
747         rdatasetheader_t *h2 = v2;
748
749         if (h1->rdh_ttl < h2->rdh_ttl)
750                 return (ISC_TRUE);
751         return (ISC_FALSE);
752 }
753
754 static isc_boolean_t
755 resign_sooner(void *v1, void *v2) {
756         rdatasetheader_t *h1 = v1;
757         rdatasetheader_t *h2 = v2;
758
759         if (h1->resign < h2->resign)
760                 return (ISC_TRUE);
761         return (ISC_FALSE);
762 }
763
764 /*%
765  * This function sets the heap index into the header.
766  */
767 static void
768 set_index(void *what, unsigned int index) {
769         rdatasetheader_t *h = what;
770
771         h->heap_index = index;
772 }
773
774 /*%
775  * Work out how many nodes can be deleted in the time between two
776  * requests to the nameserver.  Smooth the resulting number and use it
777  * as a estimate for the number of nodes to be deleted in the next
778  * iteration.
779  */
780 static unsigned int
781 adjust_quantum(unsigned int old, isc_time_t *start) {
782         unsigned int pps = dns_pps;     /* packets per second */
783         unsigned int interval;
784         isc_uint64_t usecs;
785         isc_time_t end;
786         unsigned int new;
787
788         if (pps < 100)
789                 pps = 100;
790         isc_time_now(&end);
791
792         interval = 1000000 / pps;       /* interval in usec */
793         if (interval == 0)
794                 interval = 1;
795         usecs = isc_time_microdiff(&end, start);
796         if (usecs == 0) {
797                 /*
798                  * We were unable to measure the amount of time taken.
799                  * Double the nodes deleted next time.
800                  */
801                 old *= 2;
802                 if (old > 1000)
803                         old = 1000;
804                 return (old);
805         }
806         new = old * interval;
807         new /= (unsigned int)usecs;
808         if (new == 0)
809                 new = 1;
810         else if (new > 1000)
811                 new = 1000;
812
813         /* Smooth */
814         new = (new + old * 3) / 4;
815
816         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
817                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
818
819         return (new);
820 }
821
822 static void
823 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
824         unsigned int i;
825         isc_ondestroy_t ondest;
826         isc_result_t result;
827         char buf[DNS_NAME_FORMATSIZE];
828         isc_time_t start;
829
830         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
831                 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
832
833         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
834         REQUIRE(rbtdb->future_version == NULL);
835
836         if (rbtdb->current_version != NULL) {
837                 unsigned int refs;
838
839                 isc_refcount_decrement(&rbtdb->current_version->references,
840                                        &refs);
841                 INSIST(refs == 0);
842                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
843                 isc_refcount_destroy(&rbtdb->current_version->references);
844                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
845                             sizeof(rbtdb_version_t));
846         }
847
848         /*
849          * We assume the number of remaining dead nodes is reasonably small;
850          * the overhead of unlinking all nodes here should be negligible.
851          */
852         for (i = 0; i < rbtdb->node_lock_count; i++) {
853                 dns_rbtnode_t *node;
854
855                 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
856                 while (node != NULL) {
857                         ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
858                         node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
859                 }
860         }
861
862         if (event == NULL)
863                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
864  again:
865         if (rbtdb->tree != NULL) {
866                 isc_time_now(&start);
867                 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
868                 if (result == ISC_R_QUOTA) {
869                         INSIST(rbtdb->task != NULL);
870                         if (rbtdb->quantum != 0)
871                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
872                                                                 &start);
873                         if (event == NULL)
874                                 event = isc_event_allocate(rbtdb->common.mctx,
875                                                            NULL,
876                                                          DNS_EVENT_FREESTORAGE,
877                                                            free_rbtdb_callback,
878                                                            rbtdb,
879                                                            sizeof(isc_event_t));
880                         if (event == NULL)
881                                 goto again;
882                         isc_task_send(rbtdb->task, &event);
883                         return;
884                 }
885                 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
886         }
887
888         if (rbtdb->nsec3 != NULL) {
889                 isc_time_now(&start);
890                 result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
891                 if (result == ISC_R_QUOTA) {
892                         INSIST(rbtdb->task != NULL);
893                         if (rbtdb->quantum != 0)
894                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
895                                                                 &start);
896                         if (event == NULL)
897                                 event = isc_event_allocate(rbtdb->common.mctx,
898                                                            NULL,
899                                                          DNS_EVENT_FREESTORAGE,
900                                                            free_rbtdb_callback,
901                                                            rbtdb,
902                                                            sizeof(isc_event_t));
903                         if (event == NULL)
904                                 goto again;
905                         isc_task_send(rbtdb->task, &event);
906                         return;
907                 }
908                 INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
909         }
910
911         if (event != NULL)
912                 isc_event_free(&event);
913         if (log) {
914                 if (dns_name_dynamic(&rbtdb->common.origin))
915                         dns_name_format(&rbtdb->common.origin, buf,
916                                         sizeof(buf));
917                 else
918                         strcpy(buf, "<UNKNOWN>");
919                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
920                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
921                               "done free_rbtdb(%s)", buf);
922         }
923         if (dns_name_dynamic(&rbtdb->common.origin))
924                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
925         for (i = 0; i < rbtdb->node_lock_count; i++) {
926                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
927                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
928         }
929
930         /*
931          * Clean up LRU / re-signing order lists.
932          */
933         if (rbtdb->rdatasets != NULL) {
934                 for (i = 0; i < rbtdb->node_lock_count; i++)
935                         INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
936                 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
937                             rbtdb->node_lock_count *
938                             sizeof(rdatasetheaderlist_t));
939         }
940         /*
941          * Clean up dead node buckets.
942          */
943         if (rbtdb->deadnodes != NULL) {
944                 for (i = 0; i < rbtdb->node_lock_count; i++)
945                         INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
946                 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
947                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
948         }
949         /*
950          * Clean up heap objects.
951          */
952         if (rbtdb->heaps != NULL) {
953                 for (i = 0; i < rbtdb->node_lock_count; i++)
954                         isc_heap_destroy(&rbtdb->heaps[i]);
955                 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
956                             rbtdb->node_lock_count *
957                             sizeof(isc_heap_t *));
958         }
959
960         if (rbtdb->rrsetstats != NULL)
961                 dns_stats_detach(&rbtdb->rrsetstats);
962
963         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
964                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
965         isc_rwlock_destroy(&rbtdb->tree_lock);
966         isc_refcount_destroy(&rbtdb->references);
967         if (rbtdb->task != NULL)
968                 isc_task_detach(&rbtdb->task);
969
970         RBTDB_DESTROYLOCK(&rbtdb->lock);
971         rbtdb->common.magic = 0;
972         rbtdb->common.impmagic = 0;
973         ondest = rbtdb->common.ondest;
974         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
975         isc_ondestroy_notify(&ondest, rbtdb);
976 }
977
978 static inline void
979 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
980         isc_boolean_t want_free = ISC_FALSE;
981         unsigned int i;
982         unsigned int inactive = 0;
983
984         /* XXX check for open versions here */
985
986         if (rbtdb->soanode != NULL)
987                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
988         if (rbtdb->nsnode != NULL)
989                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
990
991         /*
992          * Even though there are no external direct references, there still
993          * may be nodes in use.
994          */
995         for (i = 0; i < rbtdb->node_lock_count; i++) {
996                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
997                 rbtdb->node_locks[i].exiting = ISC_TRUE;
998                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
999                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1000                     == 0) {
1001                         inactive++;
1002                 }
1003         }
1004
1005         if (inactive != 0) {
1006                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1007                 rbtdb->active -= inactive;
1008                 if (rbtdb->active == 0)
1009                         want_free = ISC_TRUE;
1010                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1011                 if (want_free) {
1012                         char buf[DNS_NAME_FORMATSIZE];
1013                         if (dns_name_dynamic(&rbtdb->common.origin))
1014                                 dns_name_format(&rbtdb->common.origin, buf,
1015                                                 sizeof(buf));
1016                         else
1017                                 strcpy(buf, "<UNKNOWN>");
1018                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1019                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1020                                       "calling free_rbtdb(%s)", buf);
1021                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
1022                 }
1023         }
1024 }
1025
1026 static void
1027 detach(dns_db_t **dbp) {
1028         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1029         unsigned int refs;
1030
1031         REQUIRE(VALID_RBTDB(rbtdb));
1032
1033         isc_refcount_decrement(&rbtdb->references, &refs);
1034
1035         if (refs == 0)
1036                 maybe_free_rbtdb(rbtdb);
1037
1038         *dbp = NULL;
1039 }
1040
1041 static void
1042 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1043         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1044         rbtdb_version_t *version;
1045         unsigned int refs;
1046
1047         REQUIRE(VALID_RBTDB(rbtdb));
1048
1049         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1050         version = rbtdb->current_version;
1051         isc_refcount_increment(&version->references, &refs);
1052         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1053
1054         *versionp = (dns_dbversion_t *)version;
1055 }
1056
1057 static inline rbtdb_version_t *
1058 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1059                  unsigned int references, isc_boolean_t writer)
1060 {
1061         isc_result_t result;
1062         rbtdb_version_t *version;
1063
1064         version = isc_mem_get(mctx, sizeof(*version));
1065         if (version == NULL)
1066                 return (NULL);
1067         version->serial = serial;
1068         result = isc_refcount_init(&version->references, references);
1069         if (result != ISC_R_SUCCESS) {
1070                 isc_mem_put(mctx, version, sizeof(*version));
1071                 return (NULL);
1072         }
1073         version->writer = writer;
1074         version->commit_ok = ISC_FALSE;
1075         ISC_LIST_INIT(version->changed_list);
1076         ISC_LIST_INIT(version->resigned_list);
1077         ISC_LINK_INIT(version, link);
1078
1079         return (version);
1080 }
1081
1082 static isc_result_t
1083 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1084         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1085         rbtdb_version_t *version;
1086
1087         REQUIRE(VALID_RBTDB(rbtdb));
1088         REQUIRE(versionp != NULL && *versionp == NULL);
1089         REQUIRE(rbtdb->future_version == NULL);
1090
1091         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1092         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
1093         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1094                                    ISC_TRUE);
1095         if (version != NULL) {
1096                 version->commit_ok = ISC_TRUE;
1097                 version->secure = rbtdb->current_version->secure;
1098                 version->havensec3 = rbtdb->current_version->havensec3;
1099                 if (version->havensec3) {
1100                         version->flags = rbtdb->current_version->flags;
1101                         version->iterations =
1102                                 rbtdb->current_version->iterations;
1103                         version->hash = rbtdb->current_version->hash;
1104                         version->salt_length =
1105                                 rbtdb->current_version->salt_length;
1106                         memcpy(version->salt, rbtdb->current_version->salt,
1107                                version->salt_length);
1108                 } else {
1109                         version->flags = 0;
1110                         version->iterations = 0;
1111                         version->hash = 0;
1112                         version->salt_length = 0;
1113                         memset(version->salt, 0, sizeof(version->salt));
1114                 }
1115                 rbtdb->next_serial++;
1116                 rbtdb->future_version = version;
1117         }
1118         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1119
1120         if (version == NULL)
1121                 return (ISC_R_NOMEMORY);
1122
1123         *versionp = version;
1124
1125         return (ISC_R_SUCCESS);
1126 }
1127
1128 static void
1129 attachversion(dns_db_t *db, dns_dbversion_t *source,
1130               dns_dbversion_t **targetp)
1131 {
1132         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1133         rbtdb_version_t *rbtversion = source;
1134         unsigned int refs;
1135
1136         REQUIRE(VALID_RBTDB(rbtdb));
1137
1138         isc_refcount_increment(&rbtversion->references, &refs);
1139         INSIST(refs > 1);
1140
1141         *targetp = rbtversion;
1142 }
1143
1144 static rbtdb_changed_t *
1145 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1146             dns_rbtnode_t *node)
1147 {
1148         rbtdb_changed_t *changed;
1149         unsigned int refs;
1150
1151         /*
1152          * Caller must be holding the node lock if its reference must be
1153          * protected by the lock.
1154          */
1155
1156         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1157
1158         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1159
1160         REQUIRE(version->writer);
1161
1162         if (changed != NULL) {
1163                 dns_rbtnode_refincrement(node, &refs);
1164                 INSIST(refs != 0);
1165                 changed->node = node;
1166                 changed->dirty = ISC_FALSE;
1167                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1168         } else
1169                 version->commit_ok = ISC_FALSE;
1170
1171         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1172
1173         return (changed);
1174 }
1175
1176 static void
1177 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1178                  acachectl_t *array)
1179 {
1180         unsigned int count;
1181         unsigned int i;
1182         unsigned char *raw;     /* RDATASLAB */
1183
1184         /*
1185          * The caller must be holding the corresponding node lock.
1186          */
1187
1188         if (array == NULL)
1189                 return;
1190
1191         raw = (unsigned char *)header + sizeof(*header);
1192         count = raw[0] * 256 + raw[1];
1193
1194         /*
1195          * Sanity check: since an additional cache entry has a reference to
1196          * the original DB node (in the callback arg), there should be no
1197          * acache entries when the node can be freed.
1198          */
1199         for (i = 0; i < count; i++)
1200                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1201
1202         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1203 }
1204
1205 static inline void
1206 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1207
1208         if (dns_name_dynamic(&(*noqname)->name))
1209                 dns_name_free(&(*noqname)->name, mctx);
1210         if ((*noqname)->neg != NULL)
1211                 isc_mem_put(mctx, (*noqname)->neg,
1212                             dns_rdataslab_size((*noqname)->neg, 0));
1213         if ((*noqname)->negsig != NULL)
1214                 isc_mem_put(mctx, (*noqname)->negsig,
1215                             dns_rdataslab_size((*noqname)->negsig, 0));
1216         isc_mem_put(mctx, *noqname, sizeof(**noqname));
1217         *noqname = NULL;
1218 }
1219
1220 static inline void
1221 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1222 {
1223         ISC_LINK_INIT(h, link);
1224         h->heap_index = 0;
1225
1226 #if TRACE_HEADER
1227         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1228                 fprintf(stderr, "initialized header: %p\n", h);
1229 #else
1230         UNUSED(rbtdb);
1231 #endif
1232 }
1233
1234 static inline rdatasetheader_t *
1235 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1236 {
1237         rdatasetheader_t *h;
1238
1239         h = isc_mem_get(mctx, sizeof(*h));
1240         if (h == NULL)
1241                 return (NULL);
1242
1243 #if TRACE_HEADER
1244         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1245                 fprintf(stderr, "allocated header: %p\n", h);
1246 #endif
1247         init_rdataset(rbtdb, h);
1248         return (h);
1249 }
1250
1251 static inline void
1252 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1253 {
1254         unsigned int size;
1255         int idx;
1256
1257         if (EXISTS(rdataset) &&
1258             (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1259                 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1260         }
1261
1262         idx = rdataset->node->locknum;
1263         if (ISC_LINK_LINKED(rdataset, link)) {
1264                 INSIST(IS_CACHE(rbtdb));
1265                 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1266         }
1267         if (rdataset->heap_index != 0)
1268                 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1269         rdataset->heap_index = 0;
1270
1271         if (rdataset->noqname != NULL)
1272                 free_noqname(mctx, &rdataset->noqname);
1273         if (rdataset->closest != NULL)
1274                 free_noqname(mctx, &rdataset->closest);
1275
1276         free_acachearray(mctx, rdataset, rdataset->additional_auth);
1277         free_acachearray(mctx, rdataset, rdataset->additional_glue);
1278
1279         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1280                 size = sizeof(*rdataset);
1281         else
1282                 size = dns_rdataslab_size((unsigned char *)rdataset,
1283                                           sizeof(*rdataset));
1284         isc_mem_put(mctx, rdataset, size);
1285 }
1286
1287 static inline void
1288 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1289         rdatasetheader_t *header, *dcurrent;
1290         isc_boolean_t make_dirty = ISC_FALSE;
1291
1292         /*
1293          * Caller must hold the node lock.
1294          */
1295
1296         /*
1297          * We set the IGNORE attribute on rdatasets with serial number
1298          * 'serial'.  When the reference count goes to zero, these rdatasets
1299          * will be cleaned up; until that time, they will be ignored.
1300          */
1301         for (header = node->data; header != NULL; header = header->next) {
1302                 if (header->serial == serial) {
1303                         header->attributes |= RDATASET_ATTR_IGNORE;
1304                         make_dirty = ISC_TRUE;
1305                 }
1306                 for (dcurrent = header->down;
1307                      dcurrent != NULL;
1308                      dcurrent = dcurrent->down) {
1309                         if (dcurrent->serial == serial) {
1310                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1311                                 make_dirty = ISC_TRUE;
1312                         }
1313                 }
1314         }
1315         if (make_dirty)
1316                 node->dirty = 1;
1317 }
1318
1319 static inline void
1320 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1321 {
1322         rdatasetheader_t *d, *down_next;
1323
1324         for (d = top->down; d != NULL; d = down_next) {
1325                 down_next = d->down;
1326                 free_rdataset(rbtdb, mctx, d);
1327         }
1328         top->down = NULL;
1329 }
1330
1331 static inline void
1332 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1333         rdatasetheader_t *current, *top_prev, *top_next;
1334         isc_mem_t *mctx = rbtdb->common.mctx;
1335
1336         /*
1337          * Caller must be holding the node lock.
1338          */
1339
1340         top_prev = NULL;
1341         for (current = node->data; current != NULL; current = top_next) {
1342                 top_next = current->next;
1343                 clean_stale_headers(rbtdb, mctx, current);
1344                 /*
1345                  * If current is nonexistent or stale, we can clean it up.
1346                  */
1347                 if ((current->attributes &
1348                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1349                         if (top_prev != NULL)
1350                                 top_prev->next = current->next;
1351                         else
1352                                 node->data = current->next;
1353                         free_rdataset(rbtdb, mctx, current);
1354                 } else
1355                         top_prev = current;
1356         }
1357         node->dirty = 0;
1358 }
1359
1360 static inline void
1361 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1362                 rbtdb_serial_t least_serial)
1363 {
1364         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1365         rdatasetheader_t *top_prev, *top_next;
1366         isc_mem_t *mctx = rbtdb->common.mctx;
1367         isc_boolean_t still_dirty = ISC_FALSE;
1368
1369         /*
1370          * Caller must be holding the node lock.
1371          */
1372         REQUIRE(least_serial != 0);
1373
1374         top_prev = NULL;
1375         for (current = node->data; current != NULL; current = top_next) {
1376                 top_next = current->next;
1377
1378                 /*
1379                  * First, we clean up any instances of multiple rdatasets
1380                  * with the same serial number, or that have the IGNORE
1381                  * attribute.
1382                  */
1383                 dparent = current;
1384                 for (dcurrent = current->down;
1385                      dcurrent != NULL;
1386                      dcurrent = down_next) {
1387                         down_next = dcurrent->down;
1388                         INSIST(dcurrent->serial <= dparent->serial);
1389                         if (dcurrent->serial == dparent->serial ||
1390                             IGNORE(dcurrent)) {
1391                                 if (down_next != NULL)
1392                                         down_next->next = dparent;
1393                                 dparent->down = down_next;
1394                                 free_rdataset(rbtdb, mctx, dcurrent);
1395                         } else
1396                                 dparent = dcurrent;
1397                 }
1398
1399                 /*
1400                  * We've now eliminated all IGNORE datasets with the possible
1401                  * exception of current, which we now check.
1402                  */
1403                 if (IGNORE(current)) {
1404                         down_next = current->down;
1405                         if (down_next == NULL) {
1406                                 if (top_prev != NULL)
1407                                         top_prev->next = current->next;
1408                                 else
1409                                         node->data = current->next;
1410                                 free_rdataset(rbtdb, mctx, current);
1411                                 /*
1412                                  * current no longer exists, so we can
1413                                  * just continue with the loop.
1414                                  */
1415                                 continue;
1416                         } else {
1417                                 /*
1418                                  * Pull up current->down, making it the new
1419                                  * current.
1420                                  */
1421                                 if (top_prev != NULL)
1422                                         top_prev->next = down_next;
1423                                 else
1424                                         node->data = down_next;
1425                                 down_next->next = top_next;
1426                                 free_rdataset(rbtdb, mctx, current);
1427                                 current = down_next;
1428                         }
1429                 }
1430
1431                 /*
1432                  * We now try to find the first down node less than the
1433                  * least serial.
1434                  */
1435                 dparent = current;
1436                 for (dcurrent = current->down;
1437                      dcurrent != NULL;
1438                      dcurrent = down_next) {
1439                         down_next = dcurrent->down;
1440                         if (dcurrent->serial < least_serial)
1441                                 break;
1442                         dparent = dcurrent;
1443                 }
1444
1445                 /*
1446                  * If there is a such an rdataset, delete it and any older
1447                  * versions.
1448                  */
1449                 if (dcurrent != NULL) {
1450                         do {
1451                                 down_next = dcurrent->down;
1452                                 INSIST(dcurrent->serial <= least_serial);
1453                                 free_rdataset(rbtdb, mctx, dcurrent);
1454                                 dcurrent = down_next;
1455                         } while (dcurrent != NULL);
1456                         dparent->down = NULL;
1457                 }
1458
1459                 /*
1460                  * Note.  The serial number of 'current' might be less than
1461                  * least_serial too, but we cannot delete it because it is
1462                  * the most recent version, unless it is a NONEXISTENT
1463                  * rdataset.
1464                  */
1465                 if (current->down != NULL) {
1466                         still_dirty = ISC_TRUE;
1467                         top_prev = current;
1468                 } else {
1469                         /*
1470                          * If this is a NONEXISTENT rdataset, we can delete it.
1471                          */
1472                         if (NONEXISTENT(current)) {
1473                                 if (top_prev != NULL)
1474                                         top_prev->next = current->next;
1475                                 else
1476                                         node->data = current->next;
1477                                 free_rdataset(rbtdb, mctx, current);
1478                         } else
1479                                 top_prev = current;
1480                 }
1481         }
1482         if (!still_dirty)
1483                 node->dirty = 0;
1484 }
1485
1486 /*%
1487  * Clean up dead nodes.  These are nodes which have no references, and
1488  * have no data.  They are dead but we could not or chose not to delete
1489  * them when we deleted all the data at that node because we did not want
1490  * to wait for the tree write lock.
1491  *
1492  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1493  */
1494 static void
1495 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1496         dns_rbtnode_t *node;
1497         isc_result_t result;
1498         int count = 10;         /* XXXJT: should be adjustable */
1499
1500         node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1501         while (node != NULL && count > 0) {
1502                 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1503
1504                 /*
1505                  * Since we're holding a tree write lock, it should be
1506                  * impossible for this node to be referenced by others.
1507                  */
1508                 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1509                        node->data == NULL);
1510
1511                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1512                 if (node->nsec3)
1513                         result = dns_rbt_deletenode(rbtdb->nsec3, node,
1514                                                     ISC_FALSE);
1515                 else
1516                         result = dns_rbt_deletenode(rbtdb->tree, node,
1517                                                     ISC_FALSE);
1518                 if (result != ISC_R_SUCCESS)
1519                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1520                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1521                                       "cleanup_dead_nodes: "
1522                                       "dns_rbt_deletenode: %s",
1523                                       isc_result_totext(result));
1524                 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1525                 count--;
1526         }
1527 }
1528
1529 /*
1530  * Caller must be holding the node lock if its reference must be protected
1531  * by the lock.
1532  */
1533 static inline void
1534 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1535         unsigned int lockrefs, noderefs;
1536         isc_refcount_t *lockref;
1537
1538         dns_rbtnode_refincrement0(node, &noderefs);
1539         if (noderefs == 1) {    /* this is the first reference to the node */
1540                 lockref = &rbtdb->node_locks[node->locknum].references;
1541                 isc_refcount_increment0(lockref, &lockrefs);
1542                 INSIST(lockrefs != 0);
1543         }
1544         INSIST(noderefs != 0);
1545 }
1546
1547 /*
1548  * This function is assumed to be called when a node is newly referenced
1549  * and can be in the deadnode list.  In that case the node must be retrieved
1550  * from the list because it is going to be used.  In addition, if the caller
1551  * happens to hold a write lock on the tree, it's a good chance to purge dead
1552  * nodes.
1553  * Note: while a new reference is gained in multiple places, there are only very
1554  * few cases where the node can be in the deadnode list (only empty nodes can
1555  * have been added to the list).
1556  */
1557 static inline void
1558 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1559                 isc_rwlocktype_t treelocktype)
1560 {
1561         isc_boolean_t need_relock = ISC_FALSE;
1562
1563         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1564         new_reference(rbtdb, node);
1565
1566         NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1567                       isc_rwlocktype_read);
1568         if (ISC_LINK_LINKED(node, deadlink))
1569                 need_relock = ISC_TRUE;
1570         else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1571                  treelocktype == isc_rwlocktype_write)
1572                 need_relock = ISC_TRUE;
1573         NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1574                         isc_rwlocktype_read);
1575         if (need_relock) {
1576                 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1577                               isc_rwlocktype_write);
1578                 if (ISC_LINK_LINKED(node, deadlink))
1579                         ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1580                                         node, deadlink);
1581                 if (treelocktype == isc_rwlocktype_write)
1582                         cleanup_dead_nodes(rbtdb, node->locknum);
1583                 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1584                                 isc_rwlocktype_write);
1585         }
1586
1587         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1588 }
1589
1590 /*
1591  * Caller must be holding the node lock; either the "strong", read or write
1592  * lock.  Note that the lock must be held even when node references are
1593  * atomically modified; in that case the decrement operation itself does not
1594  * have to be protected, but we must avoid a race condition where multiple
1595  * threads are decreasing the reference to zero simultaneously and at least
1596  * one of them is going to free the node.
1597  * This function returns ISC_TRUE if and only if the node reference decreases
1598  * to zero.
1599  */
1600 static isc_boolean_t
1601 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1602                     rbtdb_serial_t least_serial,
1603                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1604                     isc_boolean_t pruning)
1605 {
1606         isc_result_t result;
1607         isc_boolean_t write_locked;
1608         rbtdb_nodelock_t *nodelock;
1609         unsigned int refs, nrefs;
1610         int bucket = node->locknum;
1611         isc_boolean_t no_reference;
1612
1613         nodelock = &rbtdb->node_locks[bucket];
1614
1615         /* Handle easy and typical case first. */
1616         if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1617                 dns_rbtnode_refdecrement(node, &nrefs);
1618                 INSIST((int)nrefs >= 0);
1619                 if (nrefs == 0) {
1620                         isc_refcount_decrement(&nodelock->references, &refs);
1621                         INSIST((int)refs >= 0);
1622                 }
1623                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1624         }
1625
1626         /* Upgrade the lock? */
1627         if (nlock == isc_rwlocktype_read) {
1628                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1629                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1630         }
1631         dns_rbtnode_refdecrement(node, &nrefs);
1632         INSIST((int)nrefs >= 0);
1633         if (nrefs > 0) {
1634                 /* Restore the lock? */
1635                 if (nlock == isc_rwlocktype_read)
1636                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1637                 return (ISC_FALSE);
1638         }
1639
1640         if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1641                 if (IS_CACHE(rbtdb))
1642                         clean_cache_node(rbtdb, node);
1643                 else {
1644                         if (least_serial == 0) {
1645                                 /*
1646                                  * Caller doesn't know the least serial.
1647                                  * Get it.
1648                                  */
1649                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1650                                 least_serial = rbtdb->least_serial;
1651                                 RBTDB_UNLOCK(&rbtdb->lock,
1652                                              isc_rwlocktype_read);
1653                         }
1654                         clean_zone_node(rbtdb, node, least_serial);
1655                 }
1656         }
1657
1658         isc_refcount_decrement(&nodelock->references, &refs);
1659         INSIST((int)refs >= 0);
1660
1661         /*
1662          * XXXDCL should this only be done for cache zones?
1663          */
1664         if (node->data != NULL || node->down != NULL) {
1665                 /* Restore the lock? */
1666                 if (nlock == isc_rwlocktype_read)
1667                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1668                 return (ISC_TRUE);
1669         }
1670
1671         /*
1672          * Attempt to switch to a write lock on the tree.  If this fails,
1673          * we will add this node to a linked list of nodes in this locking
1674          * bucket which we will free later.
1675          */
1676         if (tlock != isc_rwlocktype_write) {
1677                 /*
1678                  * Locking hierarchy notwithstanding, we don't need to free
1679                  * the node lock before acquiring the tree write lock because
1680                  * we only do a trylock.
1681                  */
1682                 if (tlock == isc_rwlocktype_read)
1683                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1684                 else
1685                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
1686                                                     isc_rwlocktype_write);
1687                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1688                               result == ISC_R_LOCKBUSY);
1689
1690                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1691         } else
1692                 write_locked = ISC_TRUE;
1693
1694         no_reference = ISC_TRUE;
1695         if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1696                 /*
1697                  * We can now delete the node if the reference counter is
1698                  * zero.  This should be typically the case, but a different
1699                  * thread may still gain a (new) reference just before the
1700                  * current thread locks the tree (e.g., in findnode()).
1701                  */
1702
1703                 /*
1704                  * If this node is the only one in the level it's in, deleting
1705                  * this node may recursively make its parent the only node in
1706                  * the parent level; if so, and if no one is currently using
1707                  * the parent node, this is almost the only opportunity to
1708                  * clean it up.  But the recursive cleanup is not that trivial
1709                  * since the child and parent may be in different lock buckets,
1710                  * which would cause a lock order reversal problem.  To avoid
1711                  * the trouble, we'll dispatch a separate event for batch
1712                  * cleaning.  We need to check whether we're deleting the node
1713                  * as a result of pruning to avoid infinite dispatching.
1714                  * Note: pruning happens only when a task has been set for the
1715                  * rbtdb.  If the user of the rbtdb chooses not to set a task,
1716                  * it's their responsibility to purge stale leaves (e.g. by
1717                  * periodic walk-through).
1718                  */
1719                 if (!pruning && node->parent != NULL &&
1720                     node->parent->down == node && node->left == NULL &&
1721                     node->right == NULL && rbtdb->task != NULL) {
1722                         isc_event_t *ev;
1723                         dns_db_t *db;
1724
1725                         ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1726                                                 DNS_EVENT_RBTPRUNE,
1727                                                 prune_tree, node,
1728                                                 sizeof(isc_event_t));
1729                         if (ev != NULL) {
1730                                 new_reference(rbtdb, node);
1731                                 db = NULL;
1732                                 attach((dns_db_t *)rbtdb, &db);
1733                                 ev->ev_sender = db;
1734                                 isc_task_send(rbtdb->task, &ev);
1735                                 no_reference = ISC_FALSE;
1736                         } else {
1737                                 /*
1738                                  * XXX: this is a weird situation.  We could
1739                                  * ignore this error case, but then the stale
1740                                  * node will unlikely be purged except via a
1741                                  * rare condition such as manual cleanup.  So
1742                                  * we queue it in the deadnodes list, hoping
1743                                  * the memory shortage is temporary and the node
1744                                  * will be deleted later.
1745                                  */
1746                                 isc_log_write(dns_lctx,
1747                                               DNS_LOGCATEGORY_DATABASE,
1748                                               DNS_LOGMODULE_CACHE,
1749                                               ISC_LOG_INFO,
1750                                               "decrement_reference: failed to "
1751                                               "allocate pruning event");
1752                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1753                                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1754                                                 deadlink);
1755                         }
1756                 } else {
1757                         if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1758                                 char printname[DNS_NAME_FORMATSIZE];
1759
1760                                 isc_log_write(dns_lctx,
1761                                               DNS_LOGCATEGORY_DATABASE,
1762                                               DNS_LOGMODULE_CACHE,
1763                                               ISC_LOG_DEBUG(1),
1764                                               "decrement_reference: "
1765                                               "delete from rbt: %p %s",
1766                                               node,
1767                                               dns_rbt_formatnodename(node,
1768                                                         printname,
1769                                                         sizeof(printname)));
1770                         }
1771
1772                         INSIST(!ISC_LINK_LINKED(node, deadlink));
1773                         if (node->nsec3)
1774                                 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1775                                                             ISC_FALSE);
1776                         else
1777                                 result = dns_rbt_deletenode(rbtdb->tree, node,
1778                                                             ISC_FALSE);
1779                         if (result != ISC_R_SUCCESS) {
1780                                 isc_log_write(dns_lctx,
1781                                               DNS_LOGCATEGORY_DATABASE,
1782                                               DNS_LOGMODULE_CACHE,
1783                                               ISC_LOG_WARNING,
1784                                               "decrement_reference: "
1785                                               "dns_rbt_deletenode: %s",
1786                                               isc_result_totext(result));
1787                         }
1788                 }
1789         } else if (dns_rbtnode_refcurrent(node) == 0) {
1790                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1791                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1792         } else
1793                 no_reference = ISC_FALSE;
1794
1795         /* Restore the lock? */
1796         if (nlock == isc_rwlocktype_read)
1797                 NODE_WEAKDOWNGRADE(&nodelock->lock);
1798
1799         /*
1800          * Relock a read lock, or unlock the write lock if no lock was held.
1801          */
1802         if (tlock == isc_rwlocktype_none)
1803                 if (write_locked)
1804                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1805
1806         if (tlock == isc_rwlocktype_read)
1807                 if (write_locked)
1808                         isc_rwlock_downgrade(&rbtdb->tree_lock);
1809
1810         return (no_reference);
1811 }
1812
1813 /*
1814  * Prune the tree by recursively cleaning-up single leaves.  In the worst
1815  * case, the number of iteration is the number of tree levels, which is at
1816  * most the maximum number of domain name labels, i.e, 127.  In practice, this
1817  * should be much smaller (only a few times), and even the worst case would be
1818  * acceptable for a single event.
1819  */
1820 static void
1821 prune_tree(isc_task_t *task, isc_event_t *event) {
1822         dns_rbtdb_t *rbtdb = event->ev_sender;
1823         dns_rbtnode_t *node = event->ev_arg;
1824         dns_rbtnode_t *parent;
1825         unsigned int locknum;
1826
1827         UNUSED(task);
1828
1829         isc_event_free(&event);
1830
1831         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1832         locknum = node->locknum;
1833         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1834         do {
1835                 parent = node->parent;
1836                 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1837                                     isc_rwlocktype_write, ISC_TRUE);
1838
1839                 if (parent != NULL && parent->down == NULL) {
1840                         /*
1841                          * node was the only down child of the parent and has
1842                          * just been removed.  We'll then need to examine the
1843                          * parent.  Keep the lock if possible; otherwise,
1844                          * release the old lock and acquire one for the parent.
1845                          */
1846                         if (parent->locknum != locknum) {
1847                                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1848                                             isc_rwlocktype_write);
1849                                 locknum = parent->locknum;
1850                                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1851                                           isc_rwlocktype_write);
1852                         }
1853
1854                         /*
1855                          * We need to gain a reference to the node before
1856                          * decrementing it in the next iteration.  In addition,
1857                          * if the node is in the dead-nodes list, extract it
1858                          * from the list beforehand as we do in
1859                          * reactivate_node().
1860                          */
1861                         new_reference(rbtdb, parent);
1862                         if (ISC_LINK_LINKED(parent, deadlink)) {
1863                                 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1864                                                 parent, deadlink);
1865                         }
1866                 } else
1867                         parent = NULL;
1868
1869                 node = parent;
1870         } while (node != NULL);
1871         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1872         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1873
1874         detach((dns_db_t **)&rbtdb);
1875 }
1876
1877 static inline void
1878 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1879                    rbtdb_changedlist_t *cleanup_list)
1880 {
1881         /*
1882          * Caller must be holding the database lock.
1883          */
1884
1885         rbtdb->least_serial = version->serial;
1886         *cleanup_list = version->changed_list;
1887         ISC_LIST_INIT(version->changed_list);
1888 }
1889
1890 static inline void
1891 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1892         rbtdb_changed_t *changed, *next_changed;
1893
1894         /*
1895          * If the changed record is dirty, then
1896          * an update created multiple versions of
1897          * a given rdataset.  We keep this list
1898          * until we're the least open version, at
1899          * which point it's safe to get rid of any
1900          * older versions.
1901          *
1902          * If the changed record isn't dirty, then
1903          * we don't need it anymore since we're
1904          * committing and not rolling back.
1905          *
1906          * The caller must be holding the database lock.
1907          */
1908         for (changed = HEAD(version->changed_list);
1909              changed != NULL;
1910              changed = next_changed) {
1911                 next_changed = NEXT(changed, link);
1912                 if (!changed->dirty) {
1913                         UNLINK(version->changed_list,
1914                                changed, link);
1915                         APPEND(*cleanup_list,
1916                                changed, link);
1917                 }
1918         }
1919 }
1920
1921 static void
1922 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1923         dns_rdataset_t keyset;
1924         dns_rdataset_t nsecset, signsecset;
1925         dns_rdata_t rdata = DNS_RDATA_INIT;
1926         isc_boolean_t haszonekey = ISC_FALSE;
1927         isc_boolean_t hasnsec = ISC_FALSE;
1928         isc_boolean_t hasoptbit = ISC_FALSE;
1929         isc_boolean_t nsec3createflag = ISC_FALSE;
1930         isc_result_t result;
1931
1932         dns_rdataset_init(&keyset);
1933         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1934                                      0, 0, &keyset, NULL);
1935         if (result == ISC_R_SUCCESS) {
1936                 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1937                 result = dns_rdataset_first(&keyset);
1938                 while (result == ISC_R_SUCCESS) {
1939                         dns_rdataset_current(&keyset, &keyrdata);
1940                         if (dns_zonekey_iszonekey(&keyrdata)) {
1941                                 haszonekey = ISC_TRUE;
1942                                 break;
1943                         }
1944                         result = dns_rdataset_next(&keyset);
1945                 }
1946                 dns_rdataset_disassociate(&keyset);
1947         }
1948         if (!haszonekey) {
1949                 version->secure = dns_db_insecure;
1950                 version->havensec3 = ISC_FALSE;
1951                 return;
1952         }
1953
1954         dns_rdataset_init(&nsecset);
1955         dns_rdataset_init(&signsecset);
1956         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1957                                      0, 0, &nsecset, &signsecset);
1958         if (result == ISC_R_SUCCESS) {
1959                 if (dns_rdataset_isassociated(&signsecset)) {
1960                         hasnsec = ISC_TRUE;
1961                         result = dns_rdataset_first(&nsecset);
1962                         if (result == ISC_R_SUCCESS) {
1963                                 dns_rdataset_current(&nsecset, &rdata);
1964                                 hasoptbit = dns_nsec_typepresent(&rdata,
1965                                                              dns_rdatatype_opt);
1966                         }
1967                         dns_rdataset_disassociate(&signsecset);
1968                 }
1969                 dns_rdataset_disassociate(&nsecset);
1970         }
1971
1972         setnsec3parameters(db, version, &nsec3createflag);
1973
1974         /*
1975          * Do we have a valid NSEC/NSEC3 chain?
1976          */
1977         if (version->havensec3 || (hasnsec && !hasoptbit))
1978                 version->secure = dns_db_secure;
1979         /*
1980          * Do we have a NSEC/NSEC3 chain under creation?
1981          */
1982         else if (hasoptbit || nsec3createflag)
1983                 version->secure = dns_db_partial;
1984         else
1985                 version->secure = dns_db_insecure;
1986 }
1987
1988 /*%<
1989  * Walk the origin node looking for NSEC3PARAM records.
1990  * Cache the nsec3 parameters.
1991  */
1992 static void
1993 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
1994                    isc_boolean_t *nsec3createflag)
1995 {
1996         dns_rbtnode_t *node;
1997         dns_rdata_nsec3param_t nsec3param;
1998         dns_rdata_t rdata = DNS_RDATA_INIT;
1999         isc_region_t region;
2000         isc_result_t result;
2001         rdatasetheader_t *header, *header_next;
2002         unsigned char *raw;             /* RDATASLAB */
2003         unsigned int count, length;
2004         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2005
2006         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2007         version->havensec3 = ISC_FALSE;
2008         node = rbtdb->origin_node;
2009         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2010                   isc_rwlocktype_read);
2011         for (header = node->data;
2012              header != NULL;
2013              header = header_next) {
2014                 header_next = header->next;
2015                 do {
2016                         if (header->serial <= version->serial &&
2017                             !IGNORE(header)) {
2018                                 if (NONEXISTENT(header))
2019                                         header = NULL;
2020                                 break;
2021                         } else
2022                                 header = header->down;
2023                 } while (header != NULL);
2024
2025                 if (header != NULL &&
2026                     header->type == dns_rdatatype_nsec3param) {
2027                         /*
2028                          * Find A NSEC3PARAM with a supported algorithm.
2029                          */
2030                         raw = (unsigned char *)header + sizeof(*header);
2031                         count = raw[0] * 256 + raw[1]; /* count */
2032 #if DNS_RDATASET_FIXED
2033                         raw += count * 4 + 2;
2034 #else
2035                         raw += 2;
2036 #endif
2037                         while (count-- > 0U) {
2038                                 length = raw[0] * 256 + raw[1];
2039 #if DNS_RDATASET_FIXED
2040                                 raw += 4;
2041 #else
2042                                 raw += 2;
2043 #endif
2044                                 region.base = raw;
2045                                 region.length = length;
2046                                 raw += length;
2047                                 dns_rdata_fromregion(&rdata,
2048                                                      rbtdb->common.rdclass,
2049                                                      dns_rdatatype_nsec3param,
2050                                                      &region);
2051                                 result = dns_rdata_tostruct(&rdata,
2052                                                             &nsec3param,
2053                                                             NULL);
2054                                 INSIST(result == ISC_R_SUCCESS);
2055                                 dns_rdata_reset(&rdata);
2056
2057                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2058                                     !dns_nsec3_supportedhash(nsec3param.hash))
2059                                         continue;
2060
2061 #ifdef RFC5155_STRICT
2062                                 if (nsec3param.flags != 0)
2063                                         continue;
2064 #else
2065                                 if ((nsec3param.flags & DNS_NSEC3FLAG_CREATE)
2066                                     != 0)
2067                                         *nsec3createflag = ISC_TRUE;
2068                                 if ((nsec3param.flags & ~DNS_NSEC3FLAG_OPTOUT)
2069                                     != 0)
2070                                         continue;
2071 #endif
2072
2073                                 memcpy(version->salt, nsec3param.salt,
2074                                        nsec3param.salt_length);
2075                                 version->hash = nsec3param.hash;
2076                                 version->salt_length = nsec3param.salt_length;
2077                                 version->iterations = nsec3param.iterations;
2078                                 version->flags = nsec3param.flags;
2079                                 version->havensec3 = ISC_TRUE;
2080                                 /*
2081                                  * Look for a better algorithm than the
2082                                  * unknown test algorithm.
2083                                  */
2084                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2085                                         goto unlock;
2086                         }
2087                 }
2088         }
2089  unlock:
2090         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2091                     isc_rwlocktype_read);
2092         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2093 }
2094
2095 static void
2096 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2097         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2098         rbtdb_version_t *version, *cleanup_version, *least_greater;
2099         isc_boolean_t rollback = ISC_FALSE;
2100         rbtdb_changedlist_t cleanup_list;
2101         rdatasetheaderlist_t resigned_list;
2102         rbtdb_changed_t *changed, *next_changed;
2103         rbtdb_serial_t serial, least_serial;
2104         dns_rbtnode_t *rbtnode;
2105         unsigned int refs;
2106         rdatasetheader_t *header;
2107         isc_boolean_t writer;
2108
2109         REQUIRE(VALID_RBTDB(rbtdb));
2110         version = (rbtdb_version_t *)*versionp;
2111
2112         cleanup_version = NULL;
2113         ISC_LIST_INIT(cleanup_list);
2114         ISC_LIST_INIT(resigned_list);
2115
2116         isc_refcount_decrement(&version->references, &refs);
2117         if (refs > 0) {         /* typical and easy case first */
2118                 if (commit) {
2119                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2120                         INSIST(!version->writer);
2121                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2122                 }
2123                 goto end;
2124         }
2125
2126         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2127         serial = version->serial;
2128         writer = version->writer;
2129         if (version->writer) {
2130                 if (commit) {
2131                         unsigned cur_ref;
2132                         rbtdb_version_t *cur_version;
2133
2134                         INSIST(version->commit_ok);
2135                         INSIST(version == rbtdb->future_version);
2136                         /*
2137                          * The current version is going to be replaced.
2138                          * Release the (likely last) reference to it from the
2139                          * DB itself and unlink it from the open list.
2140                          */
2141                         cur_version = rbtdb->current_version;
2142                         isc_refcount_decrement(&cur_version->references,
2143                                                &cur_ref);
2144                         if (cur_ref == 0) {
2145                                 if (cur_version->serial == rbtdb->least_serial)
2146                                         INSIST(EMPTY(cur_version->changed_list));
2147                                 UNLINK(rbtdb->open_versions,
2148                                        cur_version, link);
2149                         }
2150                         if (EMPTY(rbtdb->open_versions)) {
2151                                 /*
2152                                  * We're going to become the least open
2153                                  * version.
2154                                  */
2155                                 make_least_version(rbtdb, version,
2156                                                    &cleanup_list);
2157                         } else {
2158                                 /*
2159                                  * Some other open version is the
2160                                  * least version.  We can't cleanup
2161                                  * records that were changed in this
2162                                  * version because the older versions
2163                                  * may still be in use by an open
2164                                  * version.
2165                                  *
2166                                  * We can, however, discard the
2167                                  * changed records for things that
2168                                  * we've added that didn't exist in
2169                                  * prior versions.
2170                                  */
2171                                 cleanup_nondirty(version, &cleanup_list);
2172                         }
2173                         /*
2174                          * If the (soon to be former) current version
2175                          * isn't being used by anyone, we can clean
2176                          * it up.
2177                          */
2178                         if (cur_ref == 0) {
2179                                 cleanup_version = cur_version;
2180                                 APPENDLIST(version->changed_list,
2181                                            cleanup_version->changed_list,
2182                                            link);
2183                         }
2184                         /*
2185                          * Become the current version.
2186                          */
2187                         version->writer = ISC_FALSE;
2188                         rbtdb->current_version = version;
2189                         rbtdb->current_serial = version->serial;
2190                         rbtdb->future_version = NULL;
2191
2192                         /*
2193                          * Keep the current version in the open list, and
2194                          * gain a reference for the DB itself (see the DB
2195                          * creation function below).  This must be the only
2196                          * case where we need to increment the counter from
2197                          * zero and need to use isc_refcount_increment0().
2198                          */
2199                         isc_refcount_increment0(&version->references,
2200                                                 &cur_ref);
2201                         INSIST(cur_ref == 1);
2202                         PREPEND(rbtdb->open_versions,
2203                                 rbtdb->current_version, link);
2204                         resigned_list = version->resigned_list;
2205                         ISC_LIST_INIT(version->resigned_list);
2206                 } else {
2207                         /*
2208                          * We're rolling back this transaction.
2209                          */
2210                         cleanup_list = version->changed_list;
2211                         ISC_LIST_INIT(version->changed_list);
2212                         resigned_list = version->resigned_list;
2213                         ISC_LIST_INIT(version->resigned_list);
2214                         rollback = ISC_TRUE;
2215                         cleanup_version = version;
2216                         rbtdb->future_version = NULL;
2217                 }
2218         } else {
2219                 if (version != rbtdb->current_version) {
2220                         /*
2221                          * There are no external or internal references
2222                          * to this version and it can be cleaned up.
2223                          */
2224                         cleanup_version = version;
2225
2226                         /*
2227                          * Find the version with the least serial
2228                          * number greater than ours.
2229                          */
2230                         least_greater = PREV(version, link);
2231                         if (least_greater == NULL)
2232                                 least_greater = rbtdb->current_version;
2233
2234                         INSIST(version->serial < least_greater->serial);
2235                         /*
2236                          * Is this the least open version?
2237                          */
2238                         if (version->serial == rbtdb->least_serial) {
2239                                 /*
2240                                  * Yes.  Install the new least open
2241                                  * version.
2242                                  */
2243                                 make_least_version(rbtdb,
2244                                                    least_greater,
2245                                                    &cleanup_list);
2246                         } else {
2247                                 /*
2248                                  * Add any unexecuted cleanups to
2249                                  * those of the least greater version.
2250                                  */
2251                                 APPENDLIST(least_greater->changed_list,
2252                                            version->changed_list,
2253                                            link);
2254                         }
2255                 } else if (version->serial == rbtdb->least_serial)
2256                         INSIST(EMPTY(version->changed_list));
2257                 UNLINK(rbtdb->open_versions, version, link);
2258         }
2259         least_serial = rbtdb->least_serial;
2260         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2261
2262         /*
2263          * Update the zone's secure status.
2264          */
2265         if (writer && commit && !IS_CACHE(rbtdb))
2266                 iszonesecure(db, version, rbtdb->origin_node);
2267
2268         if (cleanup_version != NULL) {
2269                 INSIST(EMPTY(cleanup_version->changed_list));
2270                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2271                             sizeof(*cleanup_version));
2272         }
2273
2274         /*
2275          * Commit/rollback re-signed headers.
2276          */
2277         for (header = HEAD(resigned_list);
2278              header != NULL;
2279              header = HEAD(resigned_list)) {
2280                 nodelock_t *lock;
2281
2282                 ISC_LIST_UNLINK(resigned_list, header, link);
2283
2284                 lock = &rbtdb->node_locks[header->node->locknum].lock;
2285                 NODE_LOCK(lock, isc_rwlocktype_write);
2286                 if (rollback)
2287                         resign_insert(rbtdb, header->node->locknum, header);
2288                 decrement_reference(rbtdb, header->node, least_serial,
2289                                     isc_rwlocktype_write, isc_rwlocktype_none,
2290                                     ISC_FALSE);
2291                 NODE_UNLOCK(lock, isc_rwlocktype_write);
2292         }
2293
2294         if (!EMPTY(cleanup_list)) {
2295                 /*
2296                  * We acquire a tree write lock here in order to make sure
2297                  * that stale nodes will be removed in decrement_reference().
2298                  * If we didn't have the lock, those nodes could miss the
2299                  * chance to be removed until the server stops.  The write lock
2300                  * is expensive, but this event should be rare enough to justify
2301                  * the cost.
2302                  */
2303                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2304                 for (changed = HEAD(cleanup_list);
2305                      changed != NULL;
2306                      changed = next_changed) {
2307                         nodelock_t *lock;
2308
2309                         next_changed = NEXT(changed, link);
2310                         rbtnode = changed->node;
2311                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2312
2313                         NODE_LOCK(lock, isc_rwlocktype_write);
2314                         /*
2315                          * This is a good opportunity to purge any dead nodes,
2316                          * so use it.
2317                          */
2318                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2319
2320                         if (rollback)
2321                                 rollback_node(rbtnode, serial);
2322                         decrement_reference(rbtdb, rbtnode, least_serial,
2323                                             isc_rwlocktype_write,
2324                                             isc_rwlocktype_write, ISC_FALSE);
2325
2326                         NODE_UNLOCK(lock, isc_rwlocktype_write);
2327
2328                         isc_mem_put(rbtdb->common.mctx, changed,
2329                                     sizeof(*changed));
2330                 }
2331                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2332         }
2333
2334  end:
2335         *versionp = NULL;
2336 }
2337
2338 /*
2339  * Add the necessary magic for the wildcard name 'name'
2340  * to be found in 'rbtdb'.
2341  *
2342  * In order for wildcard matching to work correctly in
2343  * zone_find(), we must ensure that a node for the wildcarding
2344  * level exists in the database, and has its 'find_callback'
2345  * and 'wild' bits set.
2346  *
2347  * E.g. if the wildcard name is "*.sub.example." then we
2348  * must ensure that "sub.example." exists and is marked as
2349  * a wildcard level.
2350  */
2351 static isc_result_t
2352 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2353         isc_result_t result;
2354         dns_name_t foundname;
2355         dns_offsets_t offsets;
2356         unsigned int n;
2357         dns_rbtnode_t *node = NULL;
2358
2359         dns_name_init(&foundname, offsets);
2360         n = dns_name_countlabels(name);
2361         INSIST(n >= 2);
2362         n--;
2363         dns_name_getlabelsequence(name, 1, n, &foundname);
2364         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2365         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2366                 return (result);
2367         node->nsec3 = 0;
2368         node->find_callback = 1;
2369         node->wild = 1;
2370         return (ISC_R_SUCCESS);
2371 }
2372
2373 static isc_result_t
2374 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2375         isc_result_t result;
2376         dns_name_t foundname;
2377         dns_offsets_t offsets;
2378         unsigned int n, l, i;
2379
2380         dns_name_init(&foundname, offsets);
2381         n = dns_name_countlabels(name);
2382         l = dns_name_countlabels(&rbtdb->common.origin);
2383         i = l + 1;
2384         while (i < n) {
2385                 dns_rbtnode_t *node = NULL;     /* dummy */
2386                 dns_name_getlabelsequence(name, n - i, i, &foundname);
2387                 if (dns_name_iswildcard(&foundname)) {
2388                         result = add_wildcard_magic(rbtdb, &foundname);
2389                         if (result != ISC_R_SUCCESS)
2390                                 return (result);
2391                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
2392                                                  &node);
2393                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2394                                 return (result);
2395                         node->nsec3 = 0;
2396                 }
2397                 i++;
2398         }
2399         return (ISC_R_SUCCESS);
2400 }
2401
2402 static isc_result_t
2403 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2404          dns_dbnode_t **nodep)
2405 {
2406         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2407         dns_rbtnode_t *node = NULL;
2408         dns_name_t nodename;
2409         isc_result_t result;
2410         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2411
2412         REQUIRE(VALID_RBTDB(rbtdb));
2413
2414         dns_name_init(&nodename, NULL);
2415         RWLOCK(&rbtdb->tree_lock, locktype);
2416         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2417                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2418         if (result != ISC_R_SUCCESS) {
2419                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2420                 if (!create) {
2421                         if (result == DNS_R_PARTIALMATCH)
2422                                 result = ISC_R_NOTFOUND;
2423                         return (result);
2424                 }
2425                 /*
2426                  * It would be nice to try to upgrade the lock instead of
2427                  * unlocking then relocking.
2428                  */
2429                 locktype = isc_rwlocktype_write;
2430                 RWLOCK(&rbtdb->tree_lock, locktype);
2431                 node = NULL;
2432                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2433                 if (result == ISC_R_SUCCESS) {
2434                         dns_rbt_namefromnode(node, &nodename);
2435 #ifdef DNS_RBT_USEHASH
2436                         node->locknum = node->hashval % rbtdb->node_lock_count;
2437 #else
2438                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2439                                 rbtdb->node_lock_count;
2440 #endif
2441                         node->nsec3 = 0;
2442                         add_empty_wildcards(rbtdb, name);
2443
2444                         if (dns_name_iswildcard(name)) {
2445                                 result = add_wildcard_magic(rbtdb, name);
2446                                 if (result != ISC_R_SUCCESS) {
2447                                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2448                                         return (result);
2449                                 }
2450                         }
2451                 } else if (result != ISC_R_EXISTS) {
2452                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2453                         return (result);
2454                 }
2455         }
2456         reactivate_node(rbtdb, node, locktype);
2457         RWUNLOCK(&rbtdb->tree_lock, locktype);
2458
2459         *nodep = (dns_dbnode_t *)node;
2460
2461         return (ISC_R_SUCCESS);
2462 }
2463
2464 static isc_result_t
2465 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2466               dns_dbnode_t **nodep)
2467 {
2468         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2469         dns_rbtnode_t *node = NULL;
2470         dns_name_t nodename;
2471         isc_result_t result;
2472         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2473
2474         REQUIRE(VALID_RBTDB(rbtdb));
2475
2476         dns_name_init(&nodename, NULL);
2477         RWLOCK(&rbtdb->tree_lock, locktype);
2478         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2479                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2480         if (result != ISC_R_SUCCESS) {
2481                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2482                 if (!create) {
2483                         if (result == DNS_R_PARTIALMATCH)
2484                                 result = ISC_R_NOTFOUND;
2485                         return (result);
2486                 }
2487                 /*
2488                  * It would be nice to try to upgrade the lock instead of
2489                  * unlocking then relocking.
2490                  */
2491                 locktype = isc_rwlocktype_write;
2492                 RWLOCK(&rbtdb->tree_lock, locktype);
2493                 node = NULL;
2494                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2495                 if (result == ISC_R_SUCCESS) {
2496                         dns_rbt_namefromnode(node, &nodename);
2497 #ifdef DNS_RBT_USEHASH
2498                         node->locknum = node->hashval % rbtdb->node_lock_count;
2499 #else
2500                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2501                                 rbtdb->node_lock_count;
2502 #endif
2503                         node->nsec3 = 1U;
2504                 } else if (result != ISC_R_EXISTS) {
2505                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2506                         return (result);
2507                 }
2508         } else
2509                 INSIST(node->nsec3);
2510         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2511         new_reference(rbtdb, node);
2512         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2513         RWUNLOCK(&rbtdb->tree_lock, locktype);
2514
2515         *nodep = (dns_dbnode_t *)node;
2516
2517         return (ISC_R_SUCCESS);
2518 }
2519
2520 static isc_result_t
2521 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2522         rbtdb_search_t *search = arg;
2523         rdatasetheader_t *header, *header_next;
2524         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2525         rdatasetheader_t *found;
2526         isc_result_t result;
2527         dns_rbtnode_t *onode;
2528
2529         /*
2530          * We only want to remember the topmost zone cut, since it's the one
2531          * that counts, so we'll just continue if we've already found a
2532          * zonecut.
2533          */
2534         if (search->zonecut != NULL)
2535                 return (DNS_R_CONTINUE);
2536
2537         found = NULL;
2538         result = DNS_R_CONTINUE;
2539         onode = search->rbtdb->origin_node;
2540
2541         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2542                   isc_rwlocktype_read);
2543
2544         /*
2545          * Look for an NS or DNAME rdataset active in our version.
2546          */
2547         ns_header = NULL;
2548         dname_header = NULL;
2549         sigdname_header = NULL;
2550         for (header = node->data; header != NULL; header = header_next) {
2551                 header_next = header->next;
2552                 if (header->type == dns_rdatatype_ns ||
2553                     header->type == dns_rdatatype_dname ||
2554                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
2555                         do {
2556                                 if (header->serial <= search->serial &&
2557                                     !IGNORE(header)) {
2558                                         /*
2559                                          * Is this a "this rdataset doesn't
2560                                          * exist" record?
2561                                          */
2562                                         if (NONEXISTENT(header))
2563                                                 header = NULL;
2564                                         break;
2565                                 } else
2566                                         header = header->down;
2567                         } while (header != NULL);
2568                         if (header != NULL) {
2569                                 if (header->type == dns_rdatatype_dname)
2570                                         dname_header = header;
2571                                 else if (header->type ==
2572                                            RBTDB_RDATATYPE_SIGDNAME)
2573                                         sigdname_header = header;
2574                                 else if (node != onode ||
2575                                          IS_STUB(search->rbtdb)) {
2576                                         /*
2577                                          * We've found an NS rdataset that
2578                                          * isn't at the origin node.  We check
2579                                          * that they're not at the origin node,
2580                                          * because otherwise we'd erroneously
2581                                          * treat the zone top as if it were
2582                                          * a delegation.
2583                                          */
2584                                         ns_header = header;
2585                                 }
2586                         }
2587                 }
2588         }
2589
2590         /*
2591          * Did we find anything?
2592          */
2593         if (dname_header != NULL) {
2594                 /*
2595                  * Note that DNAME has precedence over NS if both exist.
2596                  */
2597                 found = dname_header;
2598                 search->zonecut_sigrdataset = sigdname_header;
2599         } else if (ns_header != NULL) {
2600                 found = ns_header;
2601                 search->zonecut_sigrdataset = NULL;
2602         }
2603
2604         if (found != NULL) {
2605                 /*
2606                  * We increment the reference count on node to ensure that
2607                  * search->zonecut_rdataset will still be valid later.
2608                  */
2609                 new_reference(search->rbtdb, node);
2610                 search->zonecut = node;
2611                 search->zonecut_rdataset = found;
2612                 search->need_cleanup = ISC_TRUE;
2613                 /*
2614                  * Since we've found a zonecut, anything beneath it is
2615                  * glue and is not subject to wildcard matching, so we
2616                  * may clear search->wild.
2617                  */
2618                 search->wild = ISC_FALSE;
2619                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2620                         /*
2621                          * If the caller does not want to find glue, then
2622                          * this is the best answer and the search should
2623                          * stop now.
2624                          */
2625                         result = DNS_R_PARTIALMATCH;
2626                 } else {
2627                         dns_name_t *zcname;
2628
2629                         /*
2630                          * The search will continue beneath the zone cut.
2631                          * This may or may not be the best match.  In case it
2632                          * is, we need to remember the node name.
2633                          */
2634                         zcname = dns_fixedname_name(&search->zonecut_name);
2635                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2636                                       ISC_R_SUCCESS);
2637                         search->copy_name = ISC_TRUE;
2638                 }
2639         } else {
2640                 /*
2641                  * There is no zonecut at this node which is active in this
2642                  * version.
2643                  *
2644                  * If this is a "wild" node and the caller hasn't disabled
2645                  * wildcard matching, remember that we've seen a wild node
2646                  * in case we need to go searching for wildcard matches
2647                  * later on.
2648                  */
2649                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2650                         search->wild = ISC_TRUE;
2651         }
2652
2653         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2654                     isc_rwlocktype_read);
2655
2656         return (result);
2657 }
2658
2659 static inline void
2660 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2661               rdatasetheader_t *header, isc_stdtime_t now,
2662               dns_rdataset_t *rdataset)
2663 {
2664         unsigned char *raw;     /* RDATASLAB */
2665
2666         /*
2667          * Caller must be holding the node reader lock.
2668          * XXXJT: technically, we need a writer lock, since we'll increment
2669          * the header count below.  However, since the actual counter value
2670          * doesn't matter, we prioritize performance here.  (We may want to
2671          * use atomic increment when available).
2672          */
2673
2674         if (rdataset == NULL)
2675                 return;
2676
2677         new_reference(rbtdb, node);
2678
2679         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
2680
2681         rdataset->methods = &rdataset_methods;
2682         rdataset->rdclass = rbtdb->common.rdclass;
2683         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2684         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2685         rdataset->ttl = header->rdh_ttl - now;
2686         rdataset->trust = header->trust;
2687         if (NXDOMAIN(header))
2688                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2689         if (OPTOUT(header))
2690                 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2691         rdataset->private1 = rbtdb;
2692         rdataset->private2 = node;
2693         raw = (unsigned char *)header + sizeof(*header);
2694         rdataset->private3 = raw;
2695         rdataset->count = header->count++;
2696         if (rdataset->count == ISC_UINT32_MAX)
2697                 rdataset->count = 0;
2698
2699         /*
2700          * Reset iterator state.
2701          */
2702         rdataset->privateuint4 = 0;
2703         rdataset->private5 = NULL;
2704
2705         /*
2706          * Add noqname proof.
2707          */
2708         rdataset->private6 = header->noqname;
2709         if (rdataset->private6 != NULL)
2710                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
2711         rdataset->private7 = header->closest;
2712         if (rdataset->private7 != NULL)
2713                 rdataset->attributes |=  DNS_RDATASETATTR_CLOSEST;
2714
2715         /*
2716          * Copy out re-signing information.
2717          */
2718         if (RESIGN(header)) {
2719                 rdataset->attributes |=  DNS_RDATASETATTR_RESIGN;
2720                 rdataset->resign = header->resign;
2721         } else
2722                 rdataset->resign = 0;
2723 }
2724
2725 static inline isc_result_t
2726 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2727                  dns_name_t *foundname, dns_rdataset_t *rdataset,
2728                  dns_rdataset_t *sigrdataset)
2729 {
2730         isc_result_t result;
2731         dns_name_t *zcname;
2732         rbtdb_rdatatype_t type;
2733         dns_rbtnode_t *node;
2734
2735         /*
2736          * The caller MUST NOT be holding any node locks.
2737          */
2738
2739         node = search->zonecut;
2740         type = search->zonecut_rdataset->type;
2741
2742         /*
2743          * If we have to set foundname, we do it before anything else.
2744          * If we were to set foundname after we had set nodep or bound the
2745          * rdataset, then we'd have to undo that work if dns_name_copy()
2746          * failed.  By setting foundname first, there's nothing to undo if
2747          * we have trouble.
2748          */
2749         if (foundname != NULL && search->copy_name) {
2750                 zcname = dns_fixedname_name(&search->zonecut_name);
2751                 result = dns_name_copy(zcname, foundname, NULL);
2752                 if (result != ISC_R_SUCCESS)
2753                         return (result);
2754         }
2755         if (nodep != NULL) {
2756                 /*
2757                  * Note that we don't have to increment the node's reference
2758                  * count here because we're going to use the reference we
2759                  * already have in the search block.
2760                  */
2761                 *nodep = node;
2762                 search->need_cleanup = ISC_FALSE;
2763         }
2764         if (rdataset != NULL) {
2765                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2766                           isc_rwlocktype_read);
2767                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2768                               search->now, rdataset);
2769                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2770                         bind_rdataset(search->rbtdb, node,
2771                                       search->zonecut_sigrdataset,
2772                                       search->now, sigrdataset);
2773                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2774                             isc_rwlocktype_read);
2775         }
2776
2777         if (type == dns_rdatatype_dname)
2778                 return (DNS_R_DNAME);
2779         return (DNS_R_DELEGATION);
2780 }
2781
2782 static inline isc_boolean_t
2783 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2784            dns_rbtnode_t *node)
2785 {
2786         unsigned char *raw;     /* RDATASLAB */
2787         unsigned int count, size;
2788         dns_name_t ns_name;
2789         isc_boolean_t valid = ISC_FALSE;
2790         dns_offsets_t offsets;
2791         isc_region_t region;
2792         rdatasetheader_t *header;
2793
2794         /*
2795          * No additional locking is required.
2796          */
2797
2798         /*
2799          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
2800          * if it occurs at a zone cut, but is not valid below it.
2801          */
2802         if (type == dns_rdatatype_ns) {
2803                 if (node != search->zonecut) {
2804                         return (ISC_FALSE);
2805                 }
2806         } else if (type != dns_rdatatype_a &&
2807                    type != dns_rdatatype_aaaa &&
2808                    type != dns_rdatatype_a6) {
2809                 return (ISC_FALSE);
2810         }
2811
2812         header = search->zonecut_rdataset;
2813         raw = (unsigned char *)header + sizeof(*header);
2814         count = raw[0] * 256 + raw[1];
2815 #if DNS_RDATASET_FIXED
2816         raw += 2 + (4 * count);
2817 #else
2818         raw += 2;
2819 #endif
2820
2821         while (count > 0) {
2822                 count--;
2823                 size = raw[0] * 256 + raw[1];
2824 #if DNS_RDATASET_FIXED
2825                 raw += 4;
2826 #else
2827                 raw += 2;
2828 #endif
2829                 region.base = raw;
2830                 region.length = size;
2831                 raw += size;
2832                 /*
2833                  * XXX Until we have rdata structures, we have no choice but
2834                  * to directly access the rdata format.
2835                  */
2836                 dns_name_init(&ns_name, offsets);
2837                 dns_name_fromregion(&ns_name, &region);
2838                 if (dns_name_compare(&ns_name, name) == 0) {
2839                         valid = ISC_TRUE;
2840                         break;
2841                 }
2842         }
2843
2844         return (valid);
2845 }
2846
2847 static inline isc_boolean_t
2848 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2849             dns_name_t *name)
2850 {
2851         dns_fixedname_t fnext;
2852         dns_fixedname_t forigin;
2853         dns_name_t *next;
2854         dns_name_t *origin;
2855         dns_name_t prefix;
2856         dns_rbtdb_t *rbtdb;
2857         dns_rbtnode_t *node;
2858         isc_result_t result;
2859         isc_boolean_t answer = ISC_FALSE;
2860         rdatasetheader_t *header;
2861
2862         rbtdb = search->rbtdb;
2863
2864         dns_name_init(&prefix, NULL);
2865         dns_fixedname_init(&fnext);
2866         next = dns_fixedname_name(&fnext);
2867         dns_fixedname_init(&forigin);
2868         origin = dns_fixedname_name(&forigin);
2869
2870         result = dns_rbtnodechain_next(chain, NULL, NULL);
2871         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2872                 node = NULL;
2873                 result = dns_rbtnodechain_current(chain, &prefix,
2874                                                   origin, &node);
2875                 if (result != ISC_R_SUCCESS)
2876                         break;
2877                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2878                           isc_rwlocktype_read);
2879                 for (header = node->data;
2880                      header != NULL;
2881                      header = header->next) {
2882                         if (header->serial <= search->serial &&
2883                             !IGNORE(header) && EXISTS(header))
2884                                 break;
2885                 }
2886                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2887                             isc_rwlocktype_read);
2888                 if (header != NULL)
2889                         break;
2890                 result = dns_rbtnodechain_next(chain, NULL, NULL);
2891         }
2892         if (result == ISC_R_SUCCESS)
2893                 result = dns_name_concatenate(&prefix, origin, next, NULL);
2894         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2895                 answer = ISC_TRUE;
2896         return (answer);
2897 }
2898
2899 static inline isc_boolean_t
2900 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2901         dns_fixedname_t fnext;
2902         dns_fixedname_t forigin;
2903         dns_fixedname_t fprev;
2904         dns_name_t *next;
2905         dns_name_t *origin;
2906         dns_name_t *prev;
2907         dns_name_t name;
2908         dns_name_t rname;
2909         dns_name_t tname;
2910         dns_rbtdb_t *rbtdb;
2911         dns_rbtnode_t *node;
2912         dns_rbtnodechain_t chain;
2913         isc_boolean_t check_next = ISC_TRUE;
2914         isc_boolean_t check_prev = ISC_TRUE;
2915         isc_boolean_t answer = ISC_FALSE;
2916         isc_result_t result;
2917         rdatasetheader_t *header;
2918         unsigned int n;
2919
2920         rbtdb = search->rbtdb;
2921
2922         dns_name_init(&name, NULL);
2923         dns_name_init(&tname, NULL);
2924         dns_name_init(&rname, NULL);
2925         dns_fixedname_init(&fnext);
2926         next = dns_fixedname_name(&fnext);
2927         dns_fixedname_init(&fprev);
2928         prev = dns_fixedname_name(&fprev);
2929         dns_fixedname_init(&forigin);
2930         origin = dns_fixedname_name(&forigin);
2931
2932         /*
2933          * Find if qname is at or below a empty node.
2934          * Use our own copy of the chain.
2935          */
2936
2937         chain = search->chain;
2938         do {
2939                 node = NULL;
2940                 result = dns_rbtnodechain_current(&chain, &name,
2941                                                   origin, &node);
2942                 if (result != ISC_R_SUCCESS)
2943                         break;
2944                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2945                           isc_rwlocktype_read);
2946                 for (header = node->data;
2947                      header != NULL;
2948                      header = header->next) {
2949                         if (header->serial <= search->serial &&
2950                             !IGNORE(header) && EXISTS(header))
2951                                 break;
2952                 }
2953                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2954                             isc_rwlocktype_read);
2955                 if (header != NULL)
2956                         break;
2957                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
2958         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
2959         if (result == ISC_R_SUCCESS)
2960                 result = dns_name_concatenate(&name, origin, prev, NULL);
2961         if (result != ISC_R_SUCCESS)
2962                 check_prev = ISC_FALSE;
2963
2964         result = dns_rbtnodechain_next(&chain, NULL, NULL);
2965         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2966                 node = NULL;
2967                 result = dns_rbtnodechain_current(&chain, &name,
2968                                                   origin, &node);
2969                 if (result != ISC_R_SUCCESS)
2970                         break;
2971                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2972                           isc_rwlocktype_read);
2973                 for (header = node->data;
2974                      header != NULL;
2975                      header = header->next) {
2976                         if (header->serial <= search->serial &&
2977                             !IGNORE(header) && EXISTS(header))
2978                                 break;
2979                 }
2980                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2981                             isc_rwlocktype_read);
2982                 if (header != NULL)
2983                         break;
2984                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2985         }
2986         if (result == ISC_R_SUCCESS)
2987                 result = dns_name_concatenate(&name, origin, next, NULL);
2988         if (result != ISC_R_SUCCESS)
2989                 check_next = ISC_FALSE;
2990
2991         dns_name_clone(qname, &rname);
2992
2993         /*
2994          * Remove the wildcard label to find the terminal name.
2995          */
2996         n = dns_name_countlabels(wname);
2997         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
2998
2999         do {
3000                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3001                     (check_next && dns_name_issubdomain(next, &rname))) {
3002                         answer = ISC_TRUE;
3003                         break;
3004                 }
3005                 /*
3006                  * Remove the left hand label.
3007                  */
3008                 n = dns_name_countlabels(&rname);
3009                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3010         } while (!dns_name_equal(&rname, &tname));
3011         return (answer);
3012 }
3013
3014 static inline isc_result_t
3015 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3016               dns_name_t *qname)
3017 {
3018         unsigned int i, j;
3019         dns_rbtnode_t *node, *level_node, *wnode;
3020         rdatasetheader_t *header;
3021         isc_result_t result = ISC_R_NOTFOUND;
3022         dns_name_t name;
3023         dns_name_t *wname;
3024         dns_fixedname_t fwname;
3025         dns_rbtdb_t *rbtdb;
3026         isc_boolean_t done, wild, active;
3027         dns_rbtnodechain_t wchain;
3028
3029         /*
3030          * Caller must be holding the tree lock and MUST NOT be holding
3031          * any node locks.
3032          */
3033
3034         /*
3035          * Examine each ancestor level.  If the level's wild bit
3036          * is set, then construct the corresponding wildcard name and
3037          * search for it.  If the wildcard node exists, and is active in
3038          * this version, we're done.  If not, then we next check to see
3039          * if the ancestor is active in this version.  If so, then there
3040          * can be no possible wildcard match and again we're done.  If not,
3041          * continue the search.
3042          */
3043
3044         rbtdb = search->rbtdb;
3045         i = search->chain.level_matches;
3046         done = ISC_FALSE;
3047         node = *nodep;
3048         do {
3049                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3050                           isc_rwlocktype_read);
3051
3052                 /*
3053                  * First we try to figure out if this node is active in
3054                  * the search's version.  We do this now, even though we
3055                  * may not need the information, because it simplifies the
3056                  * locking and code flow.
3057                  */
3058                 for (header = node->data;
3059                      header != NULL;
3060                      header = header->next) {
3061                         if (header->serial <= search->serial &&
3062                             !IGNORE(header) && EXISTS(header))
3063                                 break;
3064                 }
3065                 if (header != NULL)
3066                         active = ISC_TRUE;
3067                 else
3068                         active = ISC_FALSE;
3069
3070                 if (node->wild)
3071                         wild = ISC_TRUE;
3072                 else
3073                         wild = ISC_FALSE;
3074
3075                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3076                             isc_rwlocktype_read);
3077
3078                 if (wild) {
3079                         /*
3080                          * Construct the wildcard name for this level.
3081                          */
3082                         dns_name_init(&name, NULL);
3083                         dns_rbt_namefromnode(node, &name);
3084                         dns_fixedname_init(&fwname);
3085                         wname = dns_fixedname_name(&fwname);
3086                         result = dns_name_concatenate(dns_wildcardname, &name,
3087                                                       wname, NULL);
3088                         j = i;
3089                         while (result == ISC_R_SUCCESS && j != 0) {
3090                                 j--;
3091                                 level_node = search->chain.levels[j];
3092                                 dns_name_init(&name, NULL);
3093                                 dns_rbt_namefromnode(level_node, &name);
3094                                 result = dns_name_concatenate(wname,
3095                                                               &name,
3096                                                               wname,
3097                                                               NULL);
3098                         }
3099                         if (result != ISC_R_SUCCESS)
3100                                 break;
3101
3102                         wnode = NULL;
3103                         dns_rbtnodechain_init(&wchain, NULL);
3104                         result = dns_rbt_findnode(rbtdb->tree, wname,
3105                                                   NULL, &wnode, &wchain,
3106                                                   DNS_RBTFIND_EMPTYDATA,
3107                                                   NULL, NULL);
3108                         if (result == ISC_R_SUCCESS) {
3109                                 nodelock_t *lock;
3110
3111                                 /*
3112                                  * We have found the wildcard node.  If it
3113                                  * is active in the search's version, we're
3114                                  * done.
3115                                  */
3116                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
3117                                 NODE_LOCK(lock, isc_rwlocktype_read);
3118                                 for (header = wnode->data;
3119                                      header != NULL;
3120                                      header = header->next) {
3121                                         if (header->serial <= search->serial &&
3122                                             !IGNORE(header) && EXISTS(header))
3123                                                 break;
3124                                 }
3125                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3126                                 if (header != NULL ||
3127                                     activeempty(search, &wchain, wname)) {
3128                                         if (activeemtpynode(search, qname,
3129                                                             wname)) {
3130                                                 return (ISC_R_NOTFOUND);
3131                                         }
3132                                         /*
3133                                          * The wildcard node is active!
3134                                          *
3135                                          * Note: result is still ISC_R_SUCCESS
3136                                          * so we don't have to set it.
3137                                          */
3138                                         *nodep = wnode;
3139                                         break;
3140                                 }
3141                         } else if (result != ISC_R_NOTFOUND &&
3142                                    result != DNS_R_PARTIALMATCH) {
3143                                 /*
3144                                  * An error has occurred.  Bail out.
3145                                  */
3146                                 break;
3147                         }
3148                 }
3149
3150                 if (active) {
3151                         /*
3152                          * The level node is active.  Any wildcarding
3153                          * present at higher levels has no
3154                          * effect and we're done.
3155                          */
3156                         result = ISC_R_NOTFOUND;
3157                         break;
3158                 }
3159
3160                 if (i > 0) {
3161                         i--;
3162                         node = search->chain.levels[i];
3163                 } else
3164                         done = ISC_TRUE;
3165         } while (!done);
3166
3167         return (result);
3168 }
3169
3170 static isc_boolean_t
3171 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3172 {
3173         dns_rdata_t rdata = DNS_RDATA_INIT;
3174         dns_rdata_nsec3_t nsec3;
3175         unsigned char *raw;                     /* RDATASLAB */
3176         unsigned int rdlen, count;
3177         isc_region_t region;
3178         isc_result_t result;
3179
3180         REQUIRE(header->type == dns_rdatatype_nsec3);
3181
3182         raw = (unsigned char *)header + sizeof(*header);
3183         count = raw[0] * 256 + raw[1]; /* count */
3184 #if DNS_RDATASET_FIXED
3185         raw += count * 4 + 2;
3186 #else
3187         raw += 2;
3188 #endif
3189         while (count-- > 0) {
3190                 rdlen = raw[0] * 256 + raw[1];
3191 #if DNS_RDATASET_FIXED
3192                 raw += 4;
3193 #else
3194                 raw += 2;
3195 #endif
3196                 region.base = raw;
3197                 region.length = rdlen;
3198                 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3199                                      dns_rdatatype_nsec3, &region);
3200                 raw += rdlen;
3201                 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3202                 INSIST(result == ISC_R_SUCCESS);
3203                 if (nsec3.hash == search->rbtversion->hash &&
3204                     nsec3.iterations == search->rbtversion->iterations &&
3205                     nsec3.salt_length == search->rbtversion->salt_length &&
3206                     memcmp(nsec3.salt, search->rbtversion->salt,
3207                            nsec3.salt_length) == 0)
3208                         return (ISC_TRUE);
3209                 dns_rdata_reset(&rdata);
3210         }
3211         return (ISC_FALSE);
3212 }
3213
3214 /*
3215  * Find node of the NSEC/NSEC3 record that is 'name'.
3216  */
3217 static inline isc_result_t
3218 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3219                   dns_name_t *foundname, dns_rdataset_t *rdataset,
3220                   dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3221                   dns_db_secure_t secure)
3222 {
3223         dns_rbtnode_t *node;
3224         rdatasetheader_t *header, *header_next, *found, *foundsig;
3225         isc_boolean_t empty_node;
3226         isc_result_t result;
3227         dns_fixedname_t fname, forigin;
3228         dns_name_t *name, *origin;
3229         dns_rdatatype_t type;
3230         rbtdb_rdatatype_t sigtype;
3231         isc_boolean_t wraps;
3232         isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3233
3234         if (tree == search->rbtdb->nsec3) {
3235                 type = dns_rdatatype_nsec3;
3236                 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3237                 wraps = ISC_TRUE;
3238         } else {
3239                 type = dns_rdatatype_nsec;
3240                 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3241                 wraps = ISC_FALSE;
3242         }
3243
3244  again:
3245         do {
3246                 node = NULL;
3247                 dns_fixedname_init(&fname);
3248                 name = dns_fixedname_name(&fname);
3249                 dns_fixedname_init(&forigin);
3250                 origin = dns_fixedname_name(&forigin);
3251                 result = dns_rbtnodechain_current(&search->chain, name,
3252                                                   origin, &node);
3253                 if (result != ISC_R_SUCCESS)
3254                         return (result);
3255                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3256                           isc_rwlocktype_read);
3257                 found = NULL;
3258                 foundsig = NULL;
3259                 empty_node = ISC_TRUE;
3260                 for (header = node->data;
3261                      header != NULL;
3262                      header = header_next) {
3263                         header_next = header->next;
3264                         /*
3265                          * Look for an active, extant NSEC or RRSIG NSEC.
3266                          */
3267                         do {
3268                                 if (header->serial <= search->serial &&
3269                                     !IGNORE(header)) {
3270                                         /*
3271                                          * Is this a "this rdataset doesn't
3272                                          * exist" record?
3273                                          */
3274                                         if (NONEXISTENT(header))
3275                                                 header = NULL;
3276                                         break;
3277                                 } else
3278                                         header = header->down;
3279                         } while (header != NULL);
3280                         if (header != NULL) {
3281                                 /*
3282                                  * We now know that there is at least one
3283                                  * active rdataset at this node.
3284                                  */
3285                                 empty_node = ISC_FALSE;
3286                                 if (header->type == type) {
3287                                         found = header;
3288                                         if (foundsig != NULL)
3289                                                 break;
3290                                 } else if (header->type == sigtype) {
3291                                         foundsig = header;
3292                                         if (found != NULL)
3293                                                 break;
3294                                 }
3295                         }
3296                 }
3297                 if (!empty_node) {
3298                         if (found != NULL && search->rbtversion->havensec3 &&
3299                             found->type == dns_rdatatype_nsec3 &&
3300                             !matchparams(found, search)) {
3301                                 empty_node = ISC_TRUE;
3302                                 found = NULL;
3303                                 foundsig = NULL;
3304                                 result = dns_rbtnodechain_prev(&search->chain,
3305                                                                NULL, NULL);
3306                         } else if (found != NULL &&
3307                                    (foundsig != NULL || !need_sig))
3308                         {
3309                                 /*
3310                                  * We've found the right NSEC/NSEC3 record.
3311                                  *
3312                                  * Note: for this to really be the right
3313                                  * NSEC record, it's essential that the NSEC
3314                                  * records of any nodes obscured by a zone
3315                                  * cut have been removed; we assume this is
3316                                  * the case.
3317                                  */
3318                                 result = dns_name_concatenate(name, origin,
3319                                                               foundname, NULL);
3320                                 if (result == ISC_R_SUCCESS) {
3321                                         if (nodep != NULL) {
3322                                                 new_reference(search->rbtdb,
3323                                                               node);
3324                                                 *nodep = node;
3325                                         }
3326                                         bind_rdataset(search->rbtdb, node,
3327                                                       found, search->now,
3328                                                       rdataset);
3329                                         if (foundsig != NULL)
3330                                                 bind_rdataset(search->rbtdb,
3331                                                               node,
3332                                                               foundsig,
3333                                                               search->now,
3334                                                               sigrdataset);
3335                                 }
3336                         } else if (found == NULL && foundsig == NULL) {
3337                                 /*
3338                                  * This node is active, but has no NSEC or
3339                                  * RRSIG NSEC.  That means it's glue or
3340                                  * other obscured zone data that isn't
3341                                  * relevant for our search.  Treat the
3342                                  * node as if it were empty and keep looking.
3343                                  */
3344                                 empty_node = ISC_TRUE;
3345                                 result = dns_rbtnodechain_prev(&search->chain,
3346                                                                NULL, NULL);
3347                         } else {
3348                                 /*
3349                                  * We found an active node, but either the
3350                                  * NSEC or the RRSIG NSEC is missing.  This
3351                                  * shouldn't happen.
3352                                  */
3353                                 result = DNS_R_BADDB;
3354                         }
3355                 } else {
3356                         /*
3357                          * This node isn't active.  We've got to keep
3358                          * looking.
3359                          */
3360                         result = dns_rbtnodechain_prev(&search->chain, NULL,
3361                                                        NULL);
3362                 }
3363                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3364                             isc_rwlocktype_read);
3365         } while (empty_node && result == ISC_R_SUCCESS);
3366
3367         if (result == ISC_R_NOMORE && wraps) {
3368                 result = dns_rbtnodechain_last(&search->chain, tree,
3369                                                NULL, NULL);
3370                 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3371                         wraps = ISC_FALSE;
3372                         goto again;
3373                 }
3374         }
3375
3376         /*
3377          * If the result is ISC_R_NOMORE, then we got to the beginning of
3378          * the database and didn't find a NSEC record.  This shouldn't
3379          * happen.
3380          */
3381         if (result == ISC_R_NOMORE)
3382                 result = DNS_R_BADDB;
3383
3384         return (result);
3385 }
3386
3387 static isc_result_t
3388 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3389           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3390           dns_dbnode_t **nodep, dns_name_t *foundname,
3391           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3392 {
3393         dns_rbtnode_t *node = NULL;
3394         isc_result_t result;
3395         rbtdb_search_t search;
3396         isc_boolean_t cname_ok = ISC_TRUE;
3397         isc_boolean_t close_version = ISC_FALSE;
3398         isc_boolean_t maybe_zonecut = ISC_FALSE;
3399         isc_boolean_t at_zonecut = ISC_FALSE;
3400         isc_boolean_t wild;
3401         isc_boolean_t empty_node;
3402         rdatasetheader_t *header, *header_next, *found, *nsecheader;
3403         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3404         rbtdb_rdatatype_t sigtype;
3405         isc_boolean_t active;
3406         dns_rbtnodechain_t chain;
3407         nodelock_t *lock;
3408         dns_rbt_t *tree;
3409
3410         search.rbtdb = (dns_rbtdb_t *)db;
3411
3412         REQUIRE(VALID_RBTDB(search.rbtdb));
3413
3414         /*
3415          * We don't care about 'now'.
3416          */
3417         UNUSED(now);
3418
3419         /*
3420          * If the caller didn't supply a version, attach to the current
3421          * version.
3422          */
3423         if (version == NULL) {
3424                 currentversion(db, &version);
3425                 close_version = ISC_TRUE;
3426         }
3427
3428         search.rbtversion = version;
3429         search.serial = search.rbtversion->serial;
3430         search.options = options;
3431         search.copy_name = ISC_FALSE;
3432         search.need_cleanup = ISC_FALSE;
3433         search.wild = ISC_FALSE;
3434         search.zonecut = NULL;
3435         dns_fixedname_init(&search.zonecut_name);
3436         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3437         search.now = 0;
3438
3439         /*
3440          * 'wild' will be true iff. we've matched a wildcard.
3441          */
3442         wild = ISC_FALSE;
3443
3444         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3445
3446         /*
3447          * Search down from the root of the tree.  If, while going down, we
3448          * encounter a callback node, zone_zonecut_callback() will search the
3449          * rdatasets at the zone cut for active DNAME or NS rdatasets.
3450          */
3451         tree =  (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3452                                                          search.rbtdb->tree;
3453         result = dns_rbt_findnode(tree, name, foundname, &node,
3454                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
3455                                   zone_zonecut_callback, &search);
3456
3457         if (result == DNS_R_PARTIALMATCH) {
3458         partial_match:
3459                 if (search.zonecut != NULL) {
3460                     result = setup_delegation(&search, nodep, foundname,
3461                                               rdataset, sigrdataset);
3462                     goto tree_exit;
3463                 }
3464
3465                 if (search.wild) {
3466                         /*
3467                          * At least one of the levels in the search chain
3468                          * potentially has a wildcard.  For each such level,
3469                          * we must see if there's a matching wildcard active
3470                          * in the current version.
3471                          */
3472                         result = find_wildcard(&search, &node, name);
3473                         if (result == ISC_R_SUCCESS) {
3474                                 result = dns_name_copy(name, foundname, NULL);
3475                                 if (result != ISC_R_SUCCESS)
3476                                         goto tree_exit;
3477                                 wild = ISC_TRUE;
3478                                 goto found;
3479                         }
3480                         else if (result != ISC_R_NOTFOUND)
3481                                 goto tree_exit;
3482                 }
3483
3484                 chain = search.chain;
3485                 active = activeempty(&search, &chain, name);
3486
3487                 /*
3488                  * If we're here, then the name does not exist, is not
3489                  * beneath a zonecut, and there's no matching wildcard.
3490                  */
3491                 if ((search.rbtversion->secure == dns_db_secure &&
3492                      !search.rbtversion->havensec3) ||
3493                     (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3494                     (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3495                 {
3496                         result = find_closest_nsec(&search, nodep, foundname,
3497                                                    rdataset, sigrdataset, tree,
3498                                                    search.rbtversion->secure);
3499                         if (result == ISC_R_SUCCESS)
3500                                 result = active ? DNS_R_EMPTYNAME :
3501                                                   DNS_R_NXDOMAIN;
3502                 } else
3503                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3504                 goto tree_exit;
3505         } else if (result != ISC_R_SUCCESS)
3506                 goto tree_exit;
3507
3508  found:
3509         /*
3510          * We have found a node whose name is the desired name, or we
3511          * have matched a wildcard.
3512          */
3513
3514         if (search.zonecut != NULL) {
3515                 /*
3516                  * If we're beneath a zone cut, we don't want to look for
3517                  * CNAMEs because they're not legitimate zone glue.
3518                  */
3519                 cname_ok = ISC_FALSE;
3520         } else {
3521                 /*
3522                  * The node may be a zone cut itself.  If it might be one,
3523                  * make sure we check for it later.
3524                  *
3525                  * DS records live above the zone cut in ordinary zone so
3526                  * we want to ignore any referral.
3527                  *
3528                  * Stub zones don't have anything "above" the delgation so
3529                  * we always return a referral.
3530                  */
3531                 if (node->find_callback &&
3532                     ((node != search.rbtdb->origin_node &&
3533                       !dns_rdatatype_atparent(type)) ||
3534                      IS_STUB(search.rbtdb)))
3535                         maybe_zonecut = ISC_TRUE;
3536         }
3537
3538         /*
3539          * Certain DNSSEC types are not subject to CNAME matching
3540          * (RFC4035, section 2.5 and RFC3007).
3541          *
3542          * We don't check for RRSIG, because we don't store RRSIG records
3543          * directly.
3544          */
3545         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3546                 cname_ok = ISC_FALSE;
3547
3548         /*
3549          * We now go looking for rdata...
3550          */
3551
3552         lock = &search.rbtdb->node_locks[node->locknum].lock;
3553         NODE_LOCK(lock, isc_rwlocktype_read);
3554
3555         found = NULL;
3556         foundsig = NULL;
3557         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3558         nsecheader = NULL;
3559         nsecsig = NULL;
3560         cnamesig = NULL;
3561         empty_node = ISC_TRUE;
3562         for (header = node->data; header != NULL; header = header_next) {
3563                 header_next = header->next;
3564                 /*
3565                  * Look for an active, extant rdataset.
3566                  */
3567                 do {
3568                         if (header->serial <= search.serial &&
3569                             !IGNORE(header)) {
3570                                 /*
3571                                  * Is this a "this rdataset doesn't
3572                                  * exist" record?
3573                                  */
3574                                 if (NONEXISTENT(header))
3575                                         header = NULL;
3576                                 break;
3577                         } else
3578                                 header = header->down;
3579                 } while (header != NULL);
3580                 if (header != NULL) {
3581                         /*
3582                          * We now know that there is at least one active
3583                          * rdataset at this node.
3584                          */
3585                         empty_node = ISC_FALSE;
3586
3587                         /*
3588                          * Do special zone cut handling, if requested.
3589                          */
3590                         if (maybe_zonecut &&
3591                             header->type == dns_rdatatype_ns) {
3592                                 /*
3593                                  * We increment the reference count on node to
3594                                  * ensure that search->zonecut_rdataset will
3595                                  * still be valid later.
3596                                  */
3597                                 new_reference(search.rbtdb, node);
3598                                 search.zonecut = node;
3599                                 search.zonecut_rdataset = header;
3600                                 search.zonecut_sigrdataset = NULL;
3601                                 search.need_cleanup = ISC_TRUE;
3602                                 maybe_zonecut = ISC_FALSE;
3603                                 at_zonecut = ISC_TRUE;
3604                                 /*
3605                                  * It is not clear if KEY should still be
3606                                  * allowed at the parent side of the zone
3607                                  * cut or not.  It is needed for RFC3007
3608                                  * validated updates.
3609                                  */
3610                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3611                                     && type != dns_rdatatype_nsec
3612                                     && type != dns_rdatatype_key) {
3613                                         /*
3614                                          * Glue is not OK, but any answer we
3615                                          * could return would be glue.  Return
3616                                          * the delegation.
3617                                          */
3618                                         found = NULL;
3619                                         break;
3620                                 }
3621                                 if (found != NULL && foundsig != NULL)
3622                                         break;
3623                         }
3624
3625
3626                         /*
3627                          * If the NSEC3 record doesn't match the chain
3628                          * we are using behave as if it isn't here.
3629                          */
3630                         if (header->type == dns_rdatatype_nsec3 &&
3631                            !matchparams(header, &search)) {
3632                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3633                                 goto partial_match;
3634                         }
3635                         /*
3636                          * If we found a type we were looking for,
3637                          * remember it.
3638                          */
3639                         if (header->type == type ||
3640                             type == dns_rdatatype_any ||
3641                             (header->type == dns_rdatatype_cname &&
3642                              cname_ok)) {
3643                                 /*
3644                                  * We've found the answer!
3645                                  */
3646                                 found = header;
3647                                 if (header->type == dns_rdatatype_cname &&
3648                                     cname_ok) {
3649                                         /*
3650                                          * We may be finding a CNAME instead
3651                                          * of the desired type.
3652                                          *
3653                                          * If we've already got the CNAME RRSIG,
3654                                          * use it, otherwise change sigtype
3655                                          * so that we find it.
3656                                          */
3657                                         if (cnamesig != NULL)
3658                                                 foundsig = cnamesig;
3659                                         else
3660                                                 sigtype =
3661                                                     RBTDB_RDATATYPE_SIGCNAME;
3662                                 }
3663                                 /*
3664                                  * If we've got all we need, end the search.
3665                                  */
3666                                 if (!maybe_zonecut && foundsig != NULL)
3667                                         break;
3668                         } else if (header->type == sigtype) {
3669                                 /*
3670                                  * We've found the RRSIG rdataset for our
3671                                  * target type.  Remember it.
3672                                  */
3673                                 foundsig = header;
3674                                 /*
3675                                  * If we've got all we need, end the search.
3676                                  */
3677                                 if (!maybe_zonecut && found != NULL)
3678                                         break;
3679                         } else if (header->type == dns_rdatatype_nsec &&
3680                                    !search.rbtversion->havensec3) {
3681                                 /*
3682                                  * Remember a NSEC rdataset even if we're
3683                                  * not specifically looking for it, because
3684                                  * we might need it later.
3685                                  */
3686                                 nsecheader = header;
3687                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3688                                    !search.rbtversion->havensec3) {
3689                                 /*
3690                                  * If we need the NSEC rdataset, we'll also
3691                                  * need its signature.
3692                                  */
3693                                 nsecsig = header;
3694                         } else if (cname_ok &&
3695                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3696                                 /*
3697                                  * If we get a CNAME match, we'll also need
3698                                  * its signature.
3699                                  */
3700                                 cnamesig = header;
3701                         }
3702                 }
3703         }
3704
3705         if (empty_node) {
3706                 /*
3707                  * We have an exact match for the name, but there are no
3708                  * active rdatasets in the desired version.  That means that
3709                  * this node doesn't exist in the desired version, and that
3710                  * we really have a partial match.
3711                  */
3712                 if (!wild) {
3713                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3714                         goto partial_match;
3715                 }
3716         }
3717
3718         /*
3719          * If we didn't find what we were looking for...
3720          */
3721         if (found == NULL) {
3722                 if (search.zonecut != NULL) {
3723                         /*
3724                          * We were trying to find glue at a node beneath a
3725                          * zone cut, but didn't.
3726                          *
3727                          * Return the delegation.
3728                          */
3729                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3730                         result = setup_delegation(&search, nodep, foundname,
3731                                                   rdataset, sigrdataset);
3732                         goto tree_exit;
3733                 }
3734                 /*
3735                  * The desired type doesn't exist.
3736                  */
3737                 result = DNS_R_NXRRSET;
3738                 if (search.rbtversion->secure == dns_db_secure &&
3739                     !search.rbtversion->havensec3 &&
3740                     (nsecheader == NULL || nsecsig == NULL)) {
3741                         /*
3742                          * The zone is secure but there's no NSEC,
3743                          * or the NSEC has no signature!
3744                          */
3745                         if (!wild) {
3746                                 result = DNS_R_BADDB;
3747                                 goto node_exit;
3748                         }
3749
3750                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3751                         result = find_closest_nsec(&search, nodep, foundname,
3752                                                    rdataset, sigrdataset,
3753                                                    search.rbtdb->tree,
3754                                                    search.rbtversion->secure);
3755                         if (result == ISC_R_SUCCESS)
3756                                 result = DNS_R_EMPTYWILD;
3757                         goto tree_exit;
3758                 }
3759                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3760                     nsecheader == NULL)
3761                 {
3762                         /*
3763                          * There's no NSEC record, and we were told
3764                          * to find one.
3765                          */
3766                         result = DNS_R_BADDB;
3767                         goto node_exit;
3768                 }
3769                 if (nodep != NULL) {
3770                         new_reference(search.rbtdb, node);
3771                         *nodep = node;
3772                 }
3773                 if ((search.rbtversion->secure == dns_db_secure &&
3774                      !search.rbtversion->havensec3) ||
3775                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
3776                 {
3777                         bind_rdataset(search.rbtdb, node, nsecheader,
3778                                       0, rdataset);
3779                         if (nsecsig != NULL)
3780                                 bind_rdataset(search.rbtdb, node,
3781                                               nsecsig, 0, sigrdataset);
3782                 }
3783                 if (wild)
3784                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3785                 goto node_exit;
3786         }
3787
3788         /*
3789          * We found what we were looking for, or we found a CNAME.
3790          */
3791
3792         if (type != found->type &&
3793             type != dns_rdatatype_any &&
3794             found->type == dns_rdatatype_cname) {
3795                 /*
3796                  * We weren't doing an ANY query and we found a CNAME instead
3797                  * of the type we were looking for, so we need to indicate
3798                  * that result to the caller.
3799                  */
3800                 result = DNS_R_CNAME;
3801         } else if (search.zonecut != NULL) {
3802                 /*
3803                  * If we're beneath a zone cut, we must indicate that the
3804                  * result is glue, unless we're actually at the zone cut
3805                  * and the type is NSEC or KEY.
3806                  */
3807                 if (search.zonecut == node) {
3808                         /*
3809                          * It is not clear if KEY should still be
3810                          * allowed at the parent side of the zone
3811                          * cut or not.  It is needed for RFC3007
3812                          * validated updates.
3813                          */
3814                         if (type == dns_rdatatype_nsec ||
3815                             type == dns_rdatatype_nsec3 ||
3816                             type == dns_rdatatype_key)
3817                                 result = ISC_R_SUCCESS;
3818                         else if (type == dns_rdatatype_any)
3819                                 result = DNS_R_ZONECUT;
3820                         else
3821                                 result = DNS_R_GLUE;
3822                 } else
3823                         result = DNS_R_GLUE;
3824                 /*
3825                  * We might have found data that isn't glue, but was occluded
3826                  * by a dynamic update.  If the caller cares about this, they
3827                  * will have told us to validate glue.
3828                  *
3829                  * XXX We should cache the glue validity state!
3830                  */
3831                 if (result == DNS_R_GLUE &&
3832                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3833                     !valid_glue(&search, foundname, type, node)) {
3834                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3835                         result = setup_delegation(&search, nodep, foundname,
3836                                                   rdataset, sigrdataset);
3837                     goto tree_exit;
3838                 }
3839         } else {
3840                 /*
3841                  * An ordinary successful query!
3842                  */
3843                 result = ISC_R_SUCCESS;
3844         }
3845
3846         if (nodep != NULL) {
3847                 if (!at_zonecut)
3848                         new_reference(search.rbtdb, node);
3849                 else
3850                         search.need_cleanup = ISC_FALSE;
3851                 *nodep = node;
3852         }
3853
3854         if (type != dns_rdatatype_any) {
3855                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3856                 if (foundsig != NULL)
3857                         bind_rdataset(search.rbtdb, node, foundsig, 0,
3858                                       sigrdataset);
3859         }
3860
3861         if (wild)
3862                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3863
3864  node_exit:
3865         NODE_UNLOCK(lock, isc_rwlocktype_read);
3866
3867  tree_exit:
3868         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3869
3870         /*
3871          * If we found a zonecut but aren't going to use it, we have to
3872          * let go of it.
3873          */
3874         if (search.need_cleanup) {
3875                 node = search.zonecut;
3876                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3877
3878                 NODE_LOCK(lock, isc_rwlocktype_read);
3879                 decrement_reference(search.rbtdb, node, 0,
3880                                     isc_rwlocktype_read, isc_rwlocktype_none,
3881                                     ISC_FALSE);
3882                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3883         }
3884
3885         if (close_version)
3886                 closeversion(db, &version, ISC_FALSE);
3887
3888         dns_rbtnodechain_reset(&search.chain);
3889
3890         return (result);
3891 }
3892
3893 static isc_result_t
3894 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3895                  isc_stdtime_t now, dns_dbnode_t **nodep,
3896                  dns_name_t *foundname,
3897                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3898 {
3899         UNUSED(db);
3900         UNUSED(name);
3901         UNUSED(options);
3902         UNUSED(now);
3903         UNUSED(nodep);
3904         UNUSED(foundname);
3905         UNUSED(rdataset);
3906         UNUSED(sigrdataset);
3907
3908         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3909
3910         return (ISC_R_NOTIMPLEMENTED);
3911 }
3912
3913 static isc_result_t
3914 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3915         rbtdb_search_t *search = arg;
3916         rdatasetheader_t *header, *header_prev, *header_next;
3917         rdatasetheader_t *dname_header, *sigdname_header;
3918         isc_result_t result;
3919         nodelock_t *lock;
3920         isc_rwlocktype_t locktype;
3921
3922         /* XXX comment */
3923
3924         REQUIRE(search->zonecut == NULL);
3925
3926         /*
3927          * Keep compiler silent.
3928          */
3929         UNUSED(name);
3930
3931         lock = &(search->rbtdb->node_locks[node->locknum].lock);
3932         locktype = isc_rwlocktype_read;
3933         NODE_LOCK(lock, locktype);
3934
3935         /*
3936          * Look for a DNAME or RRSIG DNAME rdataset.
3937          */
3938         dname_header = NULL;
3939         sigdname_header = NULL;
3940         header_prev = NULL;
3941         for (header = node->data; header != NULL; header = header_next) {
3942                 header_next = header->next;
3943                 if (header->rdh_ttl <= search->now) {
3944                         /*
3945                          * This rdataset is stale.  If no one else is
3946                          * using the node, we can clean it up right
3947                          * now, otherwise we mark it as stale, and
3948                          * the node as dirty, so it will get cleaned
3949                          * up later.
3950                          */
3951                         if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
3952                             (locktype == isc_rwlocktype_write ||
3953                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3954                                 /*
3955                                  * We update the node's status only when we
3956                                  * can get write access; otherwise, we leave
3957                                  * others to this work.  Periodical cleaning
3958                                  * will eventually take the job as the last
3959                                  * resort.
3960                                  * We won't downgrade the lock, since other
3961                                  * rdatasets are probably stale, too.
3962                                  */
3963                                 locktype = isc_rwlocktype_write;
3964
3965                                 if (dns_rbtnode_refcurrent(node) == 0) {
3966                                         isc_mem_t *mctx;
3967
3968                                         /*
3969                                          * header->down can be non-NULL if the
3970                                          * refcount has just decremented to 0
3971                                          * but decrement_reference() has not
3972                                          * performed clean_cache_node(), in
3973                                          * which case we need to purge the
3974                                          * stale headers first.
3975                                          */
3976                                         mctx = search->rbtdb->common.mctx;
3977                                         clean_stale_headers(search->rbtdb,
3978                                                             mctx,
3979                                                             header);
3980                                         if (header_prev != NULL)
3981                                                 header_prev->next =
3982                                                         header->next;
3983                                         else
3984                                                 node->data = header->next;
3985                                         free_rdataset(search->rbtdb, mctx,
3986                                                       header);
3987                                 } else {
3988                                         header->attributes |=
3989                                                 RDATASET_ATTR_STALE;
3990                                         node->dirty = 1;
3991                                         header_prev = header;
3992                                 }
3993                         } else
3994                                 header_prev = header;
3995                 } else if (header->type == dns_rdatatype_dname &&
3996                            EXISTS(header)) {
3997                         dname_header = header;
3998                         header_prev = header;
3999                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4000                          EXISTS(header)) {
4001                         sigdname_header = header;
4002                         header_prev = header;
4003                 } else
4004                         header_prev = header;
4005         }
4006
4007         if (dname_header != NULL &&
4008             (!DNS_TRUST_PENDING(dname_header->trust) ||
4009              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4010                 /*
4011                  * We increment the reference count on node to ensure that
4012                  * search->zonecut_rdataset will still be valid later.
4013                  */
4014                 new_reference(search->rbtdb, node);
4015                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4016                 search->zonecut = node;
4017                 search->zonecut_rdataset = dname_header;
4018                 search->zonecut_sigrdataset = sigdname_header;
4019                 search->need_cleanup = ISC_TRUE;
4020                 result = DNS_R_PARTIALMATCH;
4021         } else
4022                 result = DNS_R_CONTINUE;
4023
4024         NODE_UNLOCK(lock, locktype);
4025
4026         return (result);
4027 }
4028
4029 static inline isc_result_t
4030 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4031                      dns_dbnode_t **nodep, dns_name_t *foundname,
4032                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4033 {
4034         unsigned int i;
4035         dns_rbtnode_t *level_node;
4036         rdatasetheader_t *header, *header_prev, *header_next;
4037         rdatasetheader_t *found, *foundsig;
4038         isc_result_t result = ISC_R_NOTFOUND;
4039         dns_name_t name;
4040         dns_rbtdb_t *rbtdb;
4041         isc_boolean_t done;
4042         nodelock_t *lock;
4043         isc_rwlocktype_t locktype;
4044
4045         /*
4046          * Caller must be holding the tree lock.
4047          */
4048
4049         rbtdb = search->rbtdb;
4050         i = search->chain.level_matches;
4051         done = ISC_FALSE;
4052         do {
4053                 locktype = isc_rwlocktype_read;
4054                 lock = &rbtdb->node_locks[node->locknum].lock;
4055                 NODE_LOCK(lock, locktype);
4056
4057                 /*
4058                  * Look for NS and RRSIG NS rdatasets.
4059                  */
4060                 found = NULL;
4061                 foundsig = NULL;
4062                 header_prev = NULL;
4063                 for (header = node->data;
4064                      header != NULL;
4065                      header = header_next) {
4066                         header_next = header->next;
4067                         if (header->rdh_ttl <= search->now) {
4068                                 /*
4069                                  * This rdataset is stale.  If no one else is
4070                                  * using the node, we can clean it up right
4071                                  * now, otherwise we mark it as stale, and
4072                                  * the node as dirty, so it will get cleaned
4073                                  * up later.
4074                                  */
4075                                 if ((header->rdh_ttl <= search->now -
4076                                                     RBTDB_VIRTUAL) &&
4077                                     (locktype == isc_rwlocktype_write ||
4078                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4079                                         /*
4080                                          * We update the node's status only
4081                                          * when we can get write access.
4082                                          */
4083                                         locktype = isc_rwlocktype_write;
4084
4085                                         if (dns_rbtnode_refcurrent(node)
4086                                             == 0) {
4087                                                 isc_mem_t *m;
4088
4089                                                 m = search->rbtdb->common.mctx;
4090                                                 clean_stale_headers(
4091                                                         search->rbtdb,
4092                                                         m, header);
4093                                                 if (header_prev != NULL)
4094                                                         header_prev->next =
4095                                                                 header->next;
4096                                                 else
4097                                                         node->data =
4098                                                                 header->next;
4099                                                 free_rdataset(rbtdb, m,
4100                                                               header);
4101                                         } else {
4102                                                 header->attributes |=
4103                                                         RDATASET_ATTR_STALE;
4104                                                 node->dirty = 1;
4105                                                 header_prev = header;
4106                                         }
4107                                 } else
4108                                         header_prev = header;
4109                         } else if (EXISTS(header)) {
4110                                 /*
4111                                  * We've found an extant rdataset.  See if
4112                                  * we're interested in it.
4113                                  */
4114                                 if (header->type == dns_rdatatype_ns) {
4115                                         found = header;
4116                                         if (foundsig != NULL)
4117                                                 break;
4118                                 } else if (header->type ==
4119                                            RBTDB_RDATATYPE_SIGNS) {
4120                                         foundsig = header;
4121                                         if (found != NULL)
4122                                                 break;
4123                                 }
4124                                 header_prev = header;
4125                         } else
4126                                 header_prev = header;
4127                 }
4128
4129                 if (found != NULL) {
4130                         /*
4131                          * If we have to set foundname, we do it before
4132                          * anything else.  If we were to set foundname after
4133                          * we had set nodep or bound the rdataset, then we'd
4134                          * have to undo that work if dns_name_concatenate()
4135                          * failed.  By setting foundname first, there's
4136                          * nothing to undo if we have trouble.
4137                          */
4138                         if (foundname != NULL) {
4139                                 dns_name_init(&name, NULL);
4140                                 dns_rbt_namefromnode(node, &name);
4141                                 result = dns_name_copy(&name, foundname, NULL);
4142                                 while (result == ISC_R_SUCCESS && i > 0) {
4143                                         i--;
4144                                         level_node = search->chain.levels[i];
4145                                         dns_name_init(&name, NULL);
4146                                         dns_rbt_namefromnode(level_node,
4147                                                              &name);
4148                                         result =
4149                                                 dns_name_concatenate(foundname,
4150                                                                      &name,
4151                                                                      foundname,
4152                                                                      NULL);
4153                                 }
4154                                 if (result != ISC_R_SUCCESS) {
4155                                         *nodep = NULL;
4156                                         goto node_exit;
4157                                 }
4158                         }
4159                         result = DNS_R_DELEGATION;
4160                         if (nodep != NULL) {
4161                                 new_reference(search->rbtdb, node);
4162                                 *nodep = node;
4163                         }
4164                         bind_rdataset(search->rbtdb, node, found, search->now,
4165                                       rdataset);
4166                         if (foundsig != NULL)
4167                                 bind_rdataset(search->rbtdb, node, foundsig,
4168                                               search->now, sigrdataset);
4169                         if (need_headerupdate(found, search->now) ||
4170                             (foundsig != NULL &&
4171                              need_headerupdate(foundsig, search->now))) {
4172                                 if (locktype != isc_rwlocktype_write) {
4173                                         NODE_UNLOCK(lock, locktype);
4174                                         NODE_LOCK(lock, isc_rwlocktype_write);
4175                                         locktype = isc_rwlocktype_write;
4176                                 }
4177                                 if (need_headerupdate(found, search->now))
4178                                         update_header(search->rbtdb, found,
4179                                                       search->now);
4180                                 if (foundsig != NULL &&
4181                                     need_headerupdate(foundsig, search->now)) {
4182                                         update_header(search->rbtdb, foundsig,
4183                                                       search->now);
4184                                 }
4185                         }
4186                 }
4187
4188         node_exit:
4189                 NODE_UNLOCK(lock, locktype);
4190
4191                 if (found == NULL && i > 0) {
4192                         i--;
4193                         node = search->chain.levels[i];
4194                 } else
4195                         done = ISC_TRUE;
4196
4197         } while (!done);
4198
4199         return (result);
4200 }
4201
4202 static isc_result_t
4203 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4204                   isc_stdtime_t now, dns_name_t *foundname,
4205                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4206 {
4207         dns_rbtnode_t *node;
4208         rdatasetheader_t *header, *header_next, *header_prev;
4209         rdatasetheader_t *found, *foundsig;
4210         isc_boolean_t empty_node;
4211         isc_result_t result;
4212         dns_fixedname_t fname, forigin;
4213         dns_name_t *name, *origin;
4214         rbtdb_rdatatype_t matchtype, sigmatchtype;
4215         nodelock_t *lock;
4216         isc_rwlocktype_t locktype;
4217
4218         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4219         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4220                                              dns_rdatatype_nsec);
4221
4222         do {
4223                 node = NULL;
4224                 dns_fixedname_init(&fname);
4225                 name = dns_fixedname_name(&fname);
4226                 dns_fixedname_init(&forigin);
4227                 origin = dns_fixedname_name(&forigin);
4228                 result = dns_rbtnodechain_current(&search->chain, name,
4229                                                   origin, &node);
4230                 if (result != ISC_R_SUCCESS)
4231                         return (result);
4232                 locktype = isc_rwlocktype_read;
4233                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4234                 NODE_LOCK(lock, locktype);
4235                 found = NULL;
4236                 foundsig = NULL;
4237                 empty_node = ISC_TRUE;
4238                 header_prev = NULL;
4239                 for (header = node->data;
4240                      header != NULL;
4241                      header = header_next) {
4242                         header_next = header->next;
4243                         if (header->rdh_ttl <= now) {
4244                                 /*
4245                                  * This rdataset is stale.  If no one else is
4246                                  * using the node, we can clean it up right
4247                                  * now, otherwise we mark it as stale, and the
4248                                  * node as dirty, so it will get cleaned up
4249                                  * later.
4250                                  */
4251                                 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4252                                     (locktype == isc_rwlocktype_write ||
4253                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4254                                         /*
4255                                          * We update the node's status only
4256                                          * when we can get write access.
4257                                          */
4258                                         locktype = isc_rwlocktype_write;
4259
4260                                         if (dns_rbtnode_refcurrent(node)
4261                                             == 0) {
4262                                                 isc_mem_t *m;
4263
4264                                                 m = search->rbtdb->common.mctx;
4265                                                 clean_stale_headers(
4266                                                         search->rbtdb,
4267                                                         m, header);
4268                                                 if (header_prev != NULL)
4269                                                         header_prev->next =
4270                                                                 header->next;
4271                                                 else
4272                                                         node->data = header->next;
4273                                                 free_rdataset(search->rbtdb, m,
4274                                                               header);
4275                                         } else {
4276                                                 header->attributes |=
4277                                                         RDATASET_ATTR_STALE;
4278                                                 node->dirty = 1;
4279                                                 header_prev = header;
4280                                         }
4281                                 } else
4282                                         header_prev = header;
4283                                 continue;
4284                         }
4285                         if (NONEXISTENT(header) ||
4286                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
4287                                 header_prev = header;
4288                                 continue;
4289                         }
4290                         empty_node = ISC_FALSE;
4291                         if (header->type == matchtype)
4292                                 found = header;
4293                         else if (header->type == sigmatchtype)
4294                                 foundsig = header;
4295                         header_prev = header;
4296                 }
4297                 if (found != NULL) {
4298                         result = dns_name_concatenate(name, origin,
4299                                                       foundname, NULL);
4300                         if (result != ISC_R_SUCCESS)
4301                                 goto unlock_node;
4302                         bind_rdataset(search->rbtdb, node, found,
4303                                       now, rdataset);
4304                         if (foundsig != NULL)
4305                                 bind_rdataset(search->rbtdb, node, foundsig,
4306                                               now, sigrdataset);
4307                         new_reference(search->rbtdb, node);
4308                         *nodep = node;
4309                         result = DNS_R_COVERINGNSEC;
4310                 } else if (!empty_node) {
4311                         result = ISC_R_NOTFOUND;
4312                 } else
4313                         result = dns_rbtnodechain_prev(&search->chain, NULL,
4314                                                        NULL);
4315  unlock_node:
4316                 NODE_UNLOCK(lock, locktype);
4317         } while (empty_node && result == ISC_R_SUCCESS);
4318         return (result);
4319 }
4320
4321 static isc_result_t
4322 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4323            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4324            dns_dbnode_t **nodep, dns_name_t *foundname,
4325            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4326 {
4327         dns_rbtnode_t *node = NULL;
4328         isc_result_t result;
4329         rbtdb_search_t search;
4330         isc_boolean_t cname_ok = ISC_TRUE;
4331         isc_boolean_t empty_node;
4332         nodelock_t *lock;
4333         isc_rwlocktype_t locktype;
4334         rdatasetheader_t *header, *header_prev, *header_next;
4335         rdatasetheader_t *found, *nsheader;
4336         rdatasetheader_t *foundsig, *nssig, *cnamesig;
4337         rdatasetheader_t *update, *updatesig;
4338         rbtdb_rdatatype_t sigtype, negtype;
4339
4340         UNUSED(version);
4341
4342         search.rbtdb = (dns_rbtdb_t *)db;
4343
4344         REQUIRE(VALID_RBTDB(search.rbtdb));
4345         REQUIRE(version == NULL);
4346
4347         if (now == 0)
4348                 isc_stdtime_get(&now);
4349
4350         search.rbtversion = NULL;
4351         search.serial = 1;
4352         search.options = options;
4353         search.copy_name = ISC_FALSE;
4354         search.need_cleanup = ISC_FALSE;
4355         search.wild = ISC_FALSE;
4356         search.zonecut = NULL;
4357         dns_fixedname_init(&search.zonecut_name);
4358         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4359         search.now = now;
4360         update = NULL;
4361         updatesig = NULL;
4362
4363         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4364
4365         /*
4366          * Search down from the root of the tree.  If, while going down, we
4367          * encounter a callback node, cache_zonecut_callback() will search the
4368          * rdatasets at the zone cut for a DNAME rdataset.
4369          */
4370         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4371                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
4372                                   cache_zonecut_callback, &search);
4373
4374         if (result == DNS_R_PARTIALMATCH) {
4375                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4376                         result = find_coveringnsec(&search, nodep, now,
4377                                                    foundname, rdataset,
4378                                                    sigrdataset);
4379                         if (result == DNS_R_COVERINGNSEC)
4380                                 goto tree_exit;
4381                 }
4382                 if (search.zonecut != NULL) {
4383                     result = setup_delegation(&search, nodep, foundname,
4384                                               rdataset, sigrdataset);
4385                     goto tree_exit;
4386                 } else {
4387                 find_ns:
4388                         result = find_deepest_zonecut(&search, node, nodep,
4389                                                       foundname, rdataset,
4390                                                       sigrdataset);
4391                         goto tree_exit;
4392                 }
4393         } else if (result != ISC_R_SUCCESS)
4394                 goto tree_exit;
4395
4396         /*
4397          * Certain DNSSEC types are not subject to CNAME matching
4398          * (RFC4035, section 2.5 and RFC3007).
4399          *
4400          * We don't check for RRSIG, because we don't store RRSIG records
4401          * directly.
4402          */
4403         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4404                 cname_ok = ISC_FALSE;
4405
4406         /*
4407          * We now go looking for rdata...
4408          */
4409
4410         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4411         locktype = isc_rwlocktype_read;
4412         NODE_LOCK(lock, locktype);
4413
4414         found = NULL;
4415         foundsig = NULL;
4416         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4417         negtype = RBTDB_RDATATYPE_VALUE(0, type);
4418         nsheader = NULL;
4419         nssig = NULL;
4420         cnamesig = NULL;
4421         empty_node = ISC_TRUE;
4422         header_prev = NULL;
4423         for (header = node->data; header != NULL; header = header_next) {
4424                 header_next = header->next;
4425                 if (header->rdh_ttl <= now) {
4426                         /*
4427                          * This rdataset is stale.  If no one else is using the
4428                          * node, we can clean it up right now, otherwise we
4429                          * mark it as stale, and the node as dirty, so it will
4430                          * get cleaned up later.
4431                          */
4432                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4433                             (locktype == isc_rwlocktype_write ||
4434                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4435                                 /*
4436                                  * We update the node's status only when we
4437                                  * can get write access.
4438                                  */
4439                                 locktype = isc_rwlocktype_write;
4440
4441                                 if (dns_rbtnode_refcurrent(node) == 0) {
4442                                         isc_mem_t *mctx;
4443
4444                                         mctx = search.rbtdb->common.mctx;
4445                                         clean_stale_headers(search.rbtdb, mctx,
4446                                                             header);
4447                                         if (header_prev != NULL)
4448                                                 header_prev->next =
4449                                                         header->next;
4450                                         else
4451                                                 node->data = header->next;
4452                                         free_rdataset(search.rbtdb, mctx,
4453                                                       header);
4454                                 } else {
4455                                         header->attributes |=
4456                                                 RDATASET_ATTR_STALE;
4457                                         node->dirty = 1;
4458                                         header_prev = header;
4459                                 }
4460                         } else
4461                                 header_prev = header;
4462                 } else if (EXISTS(header)) {
4463                         /*
4464                          * We now know that there is at least one active
4465                          * non-stale rdataset at this node.
4466                          */
4467                         empty_node = ISC_FALSE;
4468
4469                         /*
4470                          * If we found a type we were looking for, remember
4471                          * it.
4472                          */
4473                         if (header->type == type ||
4474                             (type == dns_rdatatype_any &&
4475                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4476                             (cname_ok && header->type ==
4477                              dns_rdatatype_cname)) {
4478                                 /*
4479                                  * We've found the answer.
4480                                  */
4481                                 found = header;
4482                                 if (header->type == dns_rdatatype_cname &&
4483                                     cname_ok &&
4484                                     cnamesig != NULL) {
4485                                         /*
4486                                          * If we've already got the CNAME RRSIG,
4487                                          * use it, otherwise change sigtype
4488                                          * so that we find it.
4489                                          */
4490                                         if (cnamesig != NULL)
4491                                                 foundsig = cnamesig;
4492                                         else
4493                                                 sigtype =
4494                                                     RBTDB_RDATATYPE_SIGCNAME;
4495                                         foundsig = cnamesig;
4496                                 }
4497                         } else if (header->type == sigtype) {
4498                                 /*
4499                                  * We've found the RRSIG rdataset for our
4500                                  * target type.  Remember it.
4501                                  */
4502                                 foundsig = header;
4503                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4504                                    header->type == negtype) {
4505                                 /*
4506                                  * We've found a negative cache entry.
4507                                  */
4508                                 found = header;
4509                         } else if (header->type == dns_rdatatype_ns) {
4510                                 /*
4511                                  * Remember a NS rdataset even if we're
4512                                  * not specifically looking for it, because
4513                                  * we might need it later.
4514                                  */
4515                                 nsheader = header;
4516                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4517                                 /*
4518                                  * If we need the NS rdataset, we'll also
4519                                  * need its signature.
4520                                  */
4521                                 nssig = header;
4522                         } else if (cname_ok &&
4523                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
4524                                 /*
4525                                  * If we get a CNAME match, we'll also need
4526                                  * its signature.
4527                                  */
4528                                 cnamesig = header;
4529                         }
4530                         header_prev = header;
4531                 } else
4532                         header_prev = header;
4533         }
4534
4535         if (empty_node) {
4536                 /*
4537                  * We have an exact match for the name, but there are no
4538                  * extant rdatasets.  That means that this node doesn't
4539                  * meaningfully exist, and that we really have a partial match.
4540                  */
4541                 NODE_UNLOCK(lock, locktype);
4542                 goto find_ns;
4543         }
4544
4545         /*
4546          * If we didn't find what we were looking for...
4547          */
4548         if (found == NULL ||
4549             (DNS_TRUST_ADDITIONAL(found->trust) &&
4550              ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4551             (found->trust == dns_trust_glue &&
4552              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4553             (DNS_TRUST_PENDING(found->trust) &&
4554              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4555                 /*
4556                  * If there is an NS rdataset at this node, then this is the
4557                  * deepest zone cut.
4558                  */
4559                 if (nsheader != NULL) {
4560                         if (nodep != NULL) {
4561                                 new_reference(search.rbtdb, node);
4562                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4563                                 *nodep = node;
4564                         }
4565                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
4566                                       rdataset);
4567                         if (need_headerupdate(nsheader, search.now))
4568                                 update = nsheader;
4569                         if (nssig != NULL) {
4570                                 bind_rdataset(search.rbtdb, node, nssig,
4571                                               search.now, sigrdataset);
4572                                 if (need_headerupdate(nssig, search.now))
4573                                         updatesig = nssig;
4574                         }
4575                         result = DNS_R_DELEGATION;
4576                         goto node_exit;
4577                 }
4578
4579                 /*
4580                  * Go find the deepest zone cut.
4581                  */
4582                 NODE_UNLOCK(lock, locktype);
4583                 goto find_ns;
4584         }
4585
4586         /*
4587          * We found what we were looking for, or we found a CNAME.
4588          */
4589
4590         if (nodep != NULL) {
4591                 new_reference(search.rbtdb, node);
4592                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4593                 *nodep = node;
4594         }
4595
4596         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4597                 /*
4598                  * We found a negative cache entry.
4599                  */
4600                 if (NXDOMAIN(found))
4601                         result = DNS_R_NCACHENXDOMAIN;
4602                 else
4603                         result = DNS_R_NCACHENXRRSET;
4604         } else if (type != found->type &&
4605                    type != dns_rdatatype_any &&
4606                    found->type == dns_rdatatype_cname) {
4607                 /*
4608                  * We weren't doing an ANY query and we found a CNAME instead
4609                  * of the type we were looking for, so we need to indicate
4610                  * that result to the caller.
4611                  */
4612                 result = DNS_R_CNAME;
4613         } else {
4614                 /*
4615                  * An ordinary successful query!
4616                  */
4617                 result = ISC_R_SUCCESS;
4618         }
4619
4620         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4621             result == DNS_R_NCACHENXRRSET) {
4622                 bind_rdataset(search.rbtdb, node, found, search.now,
4623                               rdataset);
4624                 if (need_headerupdate(found, search.now))
4625                         update = found;
4626                 if (!NEGATIVE(found) && foundsig != NULL) {
4627                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
4628                                       sigrdataset);
4629                         if (need_headerupdate(foundsig, search.now))
4630                                 updatesig = foundsig;
4631                 }
4632         }
4633
4634  node_exit:
4635         if ((update != NULL || updatesig != NULL) &&
4636             locktype != isc_rwlocktype_write) {
4637                 NODE_UNLOCK(lock, locktype);
4638                 NODE_LOCK(lock, isc_rwlocktype_write);
4639                 locktype = isc_rwlocktype_write;
4640         }
4641         if (update != NULL && need_headerupdate(update, search.now))
4642                 update_header(search.rbtdb, update, search.now);
4643         if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4644                 update_header(search.rbtdb, updatesig, search.now);
4645
4646         NODE_UNLOCK(lock, locktype);
4647
4648  tree_exit:
4649         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4650
4651         /*
4652          * If we found a zonecut but aren't going to use it, we have to
4653          * let go of it.
4654          */
4655         if (search.need_cleanup) {
4656                 node = search.zonecut;
4657                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4658
4659                 NODE_LOCK(lock, isc_rwlocktype_read);
4660                 decrement_reference(search.rbtdb, node, 0,
4661                                     isc_rwlocktype_read, isc_rwlocktype_none,
4662                                     ISC_FALSE);
4663                 NODE_UNLOCK(lock, isc_rwlocktype_read);
4664         }
4665
4666         dns_rbtnodechain_reset(&search.chain);
4667
4668         return (result);
4669 }
4670
4671 static isc_result_t
4672 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4673                   isc_stdtime_t now, dns_dbnode_t **nodep,
4674                   dns_name_t *foundname,
4675                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4676 {
4677         dns_rbtnode_t *node = NULL;
4678         nodelock_t *lock;
4679         isc_result_t result;
4680         rbtdb_search_t search;
4681         rdatasetheader_t *header, *header_prev, *header_next;
4682         rdatasetheader_t *found, *foundsig;
4683         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4684         isc_rwlocktype_t locktype;
4685
4686         search.rbtdb = (dns_rbtdb_t *)db;
4687
4688         REQUIRE(VALID_RBTDB(search.rbtdb));
4689
4690         if (now == 0)
4691                 isc_stdtime_get(&now);
4692
4693         search.rbtversion = NULL;
4694         search.serial = 1;
4695         search.options = options;
4696         search.copy_name = ISC_FALSE;
4697         search.need_cleanup = ISC_FALSE;
4698         search.wild = ISC_FALSE;
4699         search.zonecut = NULL;
4700         dns_fixedname_init(&search.zonecut_name);
4701         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4702         search.now = now;
4703
4704         if ((options & DNS_DBFIND_NOEXACT) != 0)
4705                 rbtoptions |= DNS_RBTFIND_NOEXACT;
4706
4707         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4708
4709         /*
4710          * Search down from the root of the tree.
4711          */
4712         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4713                                   &search.chain, rbtoptions, NULL, &search);
4714
4715         if (result == DNS_R_PARTIALMATCH) {
4716         find_ns:
4717                 result = find_deepest_zonecut(&search, node, nodep, foundname,
4718                                               rdataset, sigrdataset);
4719                 goto tree_exit;
4720         } else if (result != ISC_R_SUCCESS)
4721                 goto tree_exit;
4722
4723         /*
4724          * We now go looking for an NS rdataset at the node.
4725          */
4726
4727         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4728         locktype = isc_rwlocktype_read;
4729         NODE_LOCK(lock, locktype);
4730
4731         found = NULL;
4732         foundsig = NULL;
4733         header_prev = NULL;
4734         for (header = node->data; header != NULL; header = header_next) {
4735                 header_next = header->next;
4736                 if (header->rdh_ttl <= now) {
4737                         /*
4738                          * This rdataset is stale.  If no one else is using the
4739                          * node, we can clean it up right now, otherwise we
4740                          * mark it as stale, and the node as dirty, so it will
4741                          * get cleaned up later.
4742                          */
4743                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4744                             (locktype == isc_rwlocktype_write ||
4745                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4746                                 /*
4747                                  * We update the node's status only when we
4748                                  * can get write access.
4749                                  */
4750                                 locktype = isc_rwlocktype_write;
4751
4752                                 if (dns_rbtnode_refcurrent(node) == 0) {
4753                                         isc_mem_t *mctx;
4754
4755                                         mctx = search.rbtdb->common.mctx;
4756                                         clean_stale_headers(search.rbtdb, mctx,
4757                                                             header);
4758                                         if (header_prev != NULL)
4759                                                 header_prev->next =
4760                                                         header->next;
4761                                         else
4762                                                 node->data = header->next;
4763                                         free_rdataset(search.rbtdb, mctx,
4764                                                       header);
4765                                 } else {
4766                                         header->attributes |=
4767                                                 RDATASET_ATTR_STALE;
4768                                         node->dirty = 1;
4769                                         header_prev = header;
4770                                 }
4771                         } else
4772                                 header_prev = header;
4773                 } else if (EXISTS(header)) {
4774                         /*
4775                          * If we found a type we were looking for, remember
4776                          * it.
4777                          */
4778                         if (header->type == dns_rdatatype_ns) {
4779                                 /*
4780                                  * Remember a NS rdataset even if we're
4781                                  * not specifically looking for it, because
4782                                  * we might need it later.
4783                                  */
4784                                 found = header;
4785                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4786                                 /*
4787                                  * If we need the NS rdataset, we'll also
4788                                  * need its signature.
4789                                  */
4790                                 foundsig = header;
4791                         }
4792                         header_prev = header;
4793                 } else
4794                         header_prev = header;
4795         }
4796
4797         if (found == NULL) {
4798                 /*
4799                  * No NS records here.
4800                  */
4801                 NODE_UNLOCK(lock, locktype);
4802                 goto find_ns;
4803         }
4804
4805         if (nodep != NULL) {
4806                 new_reference(search.rbtdb, node);
4807                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4808                 *nodep = node;
4809         }
4810
4811         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4812         if (foundsig != NULL)
4813                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4814                               sigrdataset);
4815
4816         if (need_headerupdate(found, search.now) ||
4817             (foundsig != NULL &&  need_headerupdate(foundsig, search.now))) {
4818                 if (locktype != isc_rwlocktype_write) {
4819                         NODE_UNLOCK(lock, locktype);
4820                         NODE_LOCK(lock, isc_rwlocktype_write);
4821                         locktype = isc_rwlocktype_write;
4822                 }
4823                 if (need_headerupdate(found, search.now))
4824                         update_header(search.rbtdb, found, search.now);
4825                 if (foundsig != NULL &&
4826                     need_headerupdate(foundsig, search.now)) {
4827                         update_header(search.rbtdb, foundsig, search.now);
4828                 }
4829         }
4830
4831         NODE_UNLOCK(lock, locktype);
4832
4833  tree_exit:
4834         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4835
4836         INSIST(!search.need_cleanup);
4837
4838         dns_rbtnodechain_reset(&search.chain);
4839
4840         if (result == DNS_R_DELEGATION)
4841                 result = ISC_R_SUCCESS;
4842
4843         return (result);
4844 }
4845
4846 static void
4847 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4848         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4849         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4850         unsigned int refs;
4851
4852         REQUIRE(VALID_RBTDB(rbtdb));
4853         REQUIRE(targetp != NULL && *targetp == NULL);
4854
4855         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4856         dns_rbtnode_refincrement(node, &refs);
4857         INSIST(refs != 0);
4858         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4859
4860         *targetp = source;
4861 }
4862
4863 static void
4864 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4865         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4866         dns_rbtnode_t *node;
4867         isc_boolean_t want_free = ISC_FALSE;
4868         isc_boolean_t inactive = ISC_FALSE;
4869         rbtdb_nodelock_t *nodelock;
4870
4871         REQUIRE(VALID_RBTDB(rbtdb));
4872         REQUIRE(targetp != NULL && *targetp != NULL);
4873
4874         node = (dns_rbtnode_t *)(*targetp);
4875         nodelock = &rbtdb->node_locks[node->locknum];
4876
4877         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4878
4879         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4880                                 isc_rwlocktype_none, ISC_FALSE)) {
4881                 if (isc_refcount_current(&nodelock->references) == 0 &&
4882                     nodelock->exiting) {
4883                         inactive = ISC_TRUE;
4884                 }
4885         }
4886
4887         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4888
4889         *targetp = NULL;
4890
4891         if (inactive) {
4892                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4893                 rbtdb->active--;
4894                 if (rbtdb->active == 0)
4895                         want_free = ISC_TRUE;
4896                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4897                 if (want_free) {
4898                         char buf[DNS_NAME_FORMATSIZE];
4899                         if (dns_name_dynamic(&rbtdb->common.origin))
4900                                 dns_name_format(&rbtdb->common.origin, buf,
4901                                                 sizeof(buf));
4902                         else
4903                                 strcpy(buf, "<UNKNOWN>");
4904                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4905                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4906                                       "calling free_rbtdb(%s)", buf);
4907                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
4908                 }
4909         }
4910 }
4911
4912 static isc_result_t
4913 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4914         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4915         dns_rbtnode_t *rbtnode = node;
4916         rdatasetheader_t *header;
4917         isc_boolean_t force_expire = ISC_FALSE;
4918         /*
4919          * These are the category and module used by the cache cleaner.
4920          */
4921         isc_boolean_t log = ISC_FALSE;
4922         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4923         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4924         int level = ISC_LOG_DEBUG(2);
4925         char printname[DNS_NAME_FORMATSIZE];
4926
4927         REQUIRE(VALID_RBTDB(rbtdb));
4928
4929         /*
4930          * Caller must hold a tree lock.
4931          */
4932
4933         if (now == 0)
4934                 isc_stdtime_get(&now);
4935
4936         if (isc_mem_isovermem(rbtdb->common.mctx)) {
4937                 isc_uint32_t val;
4938
4939                 isc_random_get(&val);
4940                 /*
4941                  * XXXDCL Could stand to have a better policy, like LRU.
4942                  */
4943                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
4944
4945                 /*
4946                  * Note that 'log' can be true IFF overmem is also true.
4947                  * overmem can currently only be true for cache
4948                  * databases -- hence all of the "overmem cache" log strings.
4949                  */
4950                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
4951                 if (log)
4952                         isc_log_write(dns_lctx, category, module, level,
4953                                       "overmem cache: %s %s",
4954                                       force_expire ? "FORCE" : "check",
4955                                       dns_rbt_formatnodename(rbtnode,
4956                                                            printname,
4957                                                            sizeof(printname)));
4958         }
4959
4960         /*
4961          * We may not need write access, but this code path is not performance
4962          * sensitive, so it should be okay to always lock as a writer.
4963          */
4964         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4965                   isc_rwlocktype_write);
4966
4967         for (header = rbtnode->data; header != NULL; header = header->next)
4968                 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
4969                         /*
4970                          * We don't check if refcurrent(rbtnode) == 0 and try
4971                          * to free like we do in cache_find(), because
4972                          * refcurrent(rbtnode) must be non-zero.  This is so
4973                          * because 'node' is an argument to the function.
4974                          */
4975                         header->attributes |= RDATASET_ATTR_STALE;
4976                         rbtnode->dirty = 1;
4977                         if (log)
4978                                 isc_log_write(dns_lctx, category, module,
4979                                               level, "overmem cache: stale %s",
4980                                               printname);
4981                 } else if (force_expire) {
4982                         if (! RETAIN(header)) {
4983                                 set_ttl(rbtdb, header, 0);
4984                                 header->attributes |= RDATASET_ATTR_STALE;
4985                                 rbtnode->dirty = 1;
4986                         } else if (log) {
4987                                 isc_log_write(dns_lctx, category, module,
4988                                               level, "overmem cache: "
4989                                               "reprieve by RETAIN() %s",
4990                                               printname);
4991                         }
4992                 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
4993                         isc_log_write(dns_lctx, category, module, level,
4994                                       "overmem cache: saved %s", printname);
4995
4996         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4997                     isc_rwlocktype_write);
4998
4999         return (ISC_R_SUCCESS);
5000 }
5001
5002 static void
5003 overmem(dns_db_t *db, isc_boolean_t overmem) {
5004         /* This is an empty callback.  See adb.c:water() */
5005
5006         UNUSED(db);
5007         UNUSED(overmem);
5008
5009         return;
5010 }
5011
5012 static void
5013 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5014         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5015         dns_rbtnode_t *rbtnode = node;
5016         isc_boolean_t first;
5017
5018         REQUIRE(VALID_RBTDB(rbtdb));
5019
5020         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5021                   isc_rwlocktype_read);
5022
5023         fprintf(out, "node %p, %u references, locknum = %u\n",
5024                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5025                 rbtnode->locknum);
5026         if (rbtnode->data != NULL) {
5027                 rdatasetheader_t *current, *top_next;
5028
5029                 for (current = rbtnode->data; current != NULL;
5030                      current = top_next) {
5031                         top_next = current->next;
5032                         first = ISC_TRUE;
5033                         fprintf(out, "\ttype %u", current->type);
5034                         do {
5035                                 if (!first)
5036                                         fprintf(out, "\t");
5037                                 first = ISC_FALSE;
5038                                 fprintf(out,
5039                                         "\tserial = %lu, ttl = %u, "
5040                                         "trust = %u, attributes = %u, "
5041                                         "resign = %u\n",
5042                                         (unsigned long)current->serial,
5043                                         current->rdh_ttl,
5044                                         current->trust,
5045                                         current->attributes,
5046                                         current->resign);
5047                                 current = current->down;
5048                         } while (current != NULL);
5049                 }
5050         } else
5051                 fprintf(out, "(empty)\n");
5052
5053         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5054                     isc_rwlocktype_read);
5055 }
5056
5057 static isc_result_t
5058 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5059 {
5060         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5061         rbtdb_dbiterator_t *rbtdbiter;
5062
5063         REQUIRE(VALID_RBTDB(rbtdb));
5064
5065         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5066         if (rbtdbiter == NULL)
5067                 return (ISC_R_NOMEMORY);
5068
5069         rbtdbiter->common.methods = &dbiterator_methods;
5070         rbtdbiter->common.db = NULL;
5071         dns_db_attach(db, &rbtdbiter->common.db);
5072         rbtdbiter->common.relative_names =
5073                         ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5074         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5075         rbtdbiter->common.cleaning = ISC_FALSE;
5076         rbtdbiter->paused = ISC_TRUE;
5077         rbtdbiter->tree_locked = isc_rwlocktype_none;
5078         rbtdbiter->result = ISC_R_SUCCESS;
5079         dns_fixedname_init(&rbtdbiter->name);
5080         dns_fixedname_init(&rbtdbiter->origin);
5081         rbtdbiter->node = NULL;
5082         rbtdbiter->delete = 0;
5083         rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5084         rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5085         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5086         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5087         dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5088         if (rbtdbiter->nsec3only)
5089                 rbtdbiter->current = &rbtdbiter->nsec3chain;
5090         else
5091                 rbtdbiter->current = &rbtdbiter->chain;
5092
5093         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5094
5095         return (ISC_R_SUCCESS);
5096 }
5097
5098 static isc_result_t
5099 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5100                   dns_rdatatype_t type, dns_rdatatype_t covers,
5101                   isc_stdtime_t now, dns_rdataset_t *rdataset,
5102                   dns_rdataset_t *sigrdataset)
5103 {
5104         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5105         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5106         rdatasetheader_t *header, *header_next, *found, *foundsig;
5107         rbtdb_serial_t serial;
5108         rbtdb_version_t *rbtversion = version;
5109         isc_boolean_t close_version = ISC_FALSE;
5110         rbtdb_rdatatype_t matchtype, sigmatchtype;
5111
5112         REQUIRE(VALID_RBTDB(rbtdb));
5113         REQUIRE(type != dns_rdatatype_any);
5114
5115         if (rbtversion == NULL) {
5116                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5117                 close_version = ISC_TRUE;
5118         }
5119         serial = rbtversion->serial;
5120         now = 0;
5121
5122         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5123                   isc_rwlocktype_read);
5124
5125         found = NULL;
5126         foundsig = NULL;
5127         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5128         if (covers == 0)
5129                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5130         else
5131                 sigmatchtype = 0;
5132
5133         for (header = rbtnode->data; header != NULL; header = header_next) {
5134                 header_next = header->next;
5135                 do {
5136                         if (header->serial <= serial &&
5137                             !IGNORE(header)) {
5138                                 /*
5139                                  * Is this a "this rdataset doesn't
5140                                  * exist" record?
5141                                  */
5142                                 if (NONEXISTENT(header))
5143                                         header = NULL;
5144                                 break;
5145                         } else
5146                                 header = header->down;
5147                 } while (header != NULL);
5148                 if (header != NULL) {
5149                         /*
5150                          * We have an active, extant rdataset.  If it's a
5151                          * type we're looking for, remember it.
5152                          */
5153                         if (header->type == matchtype) {
5154                                 found = header;
5155                                 if (foundsig != NULL)
5156                                         break;
5157                         } else if (header->type == sigmatchtype) {
5158                                 foundsig = header;
5159                                 if (found != NULL)
5160                                         break;
5161                         }
5162                 }
5163         }
5164         if (found != NULL) {
5165                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5166                 if (foundsig != NULL)
5167                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5168                                       sigrdataset);
5169         }
5170
5171         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5172                     isc_rwlocktype_read);
5173
5174         if (close_version)
5175                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5176                              ISC_FALSE);
5177
5178         if (found == NULL)
5179                 return (ISC_R_NOTFOUND);
5180
5181         return (ISC_R_SUCCESS);
5182 }
5183
5184 static isc_result_t
5185 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5186                    dns_rdatatype_t type, dns_rdatatype_t covers,
5187                    isc_stdtime_t now, dns_rdataset_t *rdataset,
5188                    dns_rdataset_t *sigrdataset)
5189 {
5190         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5191         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5192         rdatasetheader_t *header, *header_next, *found, *foundsig;
5193         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5194         isc_result_t result;
5195         nodelock_t *lock;
5196         isc_rwlocktype_t locktype;
5197
5198         REQUIRE(VALID_RBTDB(rbtdb));
5199         REQUIRE(type != dns_rdatatype_any);
5200
5201         UNUSED(version);
5202
5203         result = ISC_R_SUCCESS;
5204
5205         if (now == 0)
5206                 isc_stdtime_get(&now);
5207
5208         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5209         locktype = isc_rwlocktype_read;
5210         NODE_LOCK(lock, locktype);
5211
5212         found = NULL;
5213         foundsig = NULL;
5214         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5215         negtype = RBTDB_RDATATYPE_VALUE(0, type);
5216         if (covers == 0)
5217                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5218         else
5219                 sigmatchtype = 0;
5220
5221         for (header = rbtnode->data; header != NULL; header = header_next) {
5222                 header_next = header->next;
5223                 if (header->rdh_ttl <= now) {
5224                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5225                             (locktype == isc_rwlocktype_write ||
5226                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5227                                 /*
5228                                  * We update the node's status only when we
5229                                  * can get write access.
5230                                  */
5231                                 locktype = isc_rwlocktype_write;
5232
5233                                 /*
5234                                  * We don't check if refcurrent(rbtnode) == 0
5235                                  * and try to free like we do in cache_find(),
5236                                  * because refcurrent(rbtnode) must be
5237                                  * non-zero.  This is so because 'node' is an
5238                                  * argument to the function.
5239                                  */
5240                                 header->attributes |= RDATASET_ATTR_STALE;
5241                                 rbtnode->dirty = 1;
5242                         }
5243                 } else if (EXISTS(header)) {
5244                         if (header->type == matchtype)
5245                                 found = header;
5246                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5247                                  header->type == negtype)
5248                                 found = header;
5249                         else if (header->type == sigmatchtype)
5250                                 foundsig = header;
5251                 }
5252         }
5253         if (found != NULL) {
5254                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5255                 if (!NEGATIVE(found) && foundsig != NULL)
5256                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5257                                       sigrdataset);
5258         }
5259
5260         NODE_UNLOCK(lock, locktype);
5261
5262         if (found == NULL)
5263                 return (ISC_R_NOTFOUND);
5264
5265         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
5266                 /*
5267                  * We found a negative cache entry.
5268                  */
5269                 if (NXDOMAIN(found))
5270                         result = DNS_R_NCACHENXDOMAIN;
5271                 else
5272                         result = DNS_R_NCACHENXRRSET;
5273         }
5274
5275         return (result);
5276 }
5277
5278 static isc_result_t
5279 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5280              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5281 {
5282         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5283         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5284         rbtdb_version_t *rbtversion = version;
5285         rbtdb_rdatasetiter_t *iterator;
5286         unsigned int refs;
5287
5288         REQUIRE(VALID_RBTDB(rbtdb));
5289
5290         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5291         if (iterator == NULL)
5292                 return (ISC_R_NOMEMORY);
5293
5294         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5295                 now = 0;
5296                 if (rbtversion == NULL)
5297                         currentversion(db,
5298                                  (dns_dbversion_t **) (void *)(&rbtversion));
5299                 else {
5300                         unsigned int refs;
5301
5302                         isc_refcount_increment(&rbtversion->references,
5303                                                &refs);
5304                         INSIST(refs > 1);
5305                 }
5306         } else {
5307                 if (now == 0)
5308                         isc_stdtime_get(&now);
5309                 rbtversion = NULL;
5310         }
5311
5312         iterator->common.magic = DNS_RDATASETITER_MAGIC;
5313         iterator->common.methods = &rdatasetiter_methods;
5314         iterator->common.db = db;
5315         iterator->common.node = node;
5316         iterator->common.version = (dns_dbversion_t *)rbtversion;
5317         iterator->common.now = now;
5318
5319         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5320
5321         dns_rbtnode_refincrement(rbtnode, &refs);
5322         INSIST(refs != 0);
5323
5324         iterator->current = NULL;
5325
5326         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5327
5328         *iteratorp = (dns_rdatasetiter_t *)iterator;
5329
5330         return (ISC_R_SUCCESS);
5331 }
5332
5333 static isc_boolean_t
5334 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5335         rdatasetheader_t *header, *header_next;
5336         isc_boolean_t cname, other_data;
5337         dns_rdatatype_t rdtype;
5338
5339         /*
5340          * The caller must hold the node lock.
5341          */
5342
5343         /*
5344          * Look for CNAME and "other data" rdatasets active in our version.
5345          */
5346         cname = ISC_FALSE;
5347         other_data = ISC_FALSE;
5348         for (header = node->data; header != NULL; header = header_next) {
5349                 header_next = header->next;
5350                 if (header->type == dns_rdatatype_cname) {
5351                         /*
5352                          * Look for an active extant CNAME.
5353                          */
5354                         do {
5355                                 if (header->serial <= serial &&
5356                                     !IGNORE(header)) {
5357                                         /*
5358                                          * Is this a "this rdataset doesn't
5359                                          * exist" record?
5360                                          */
5361                                         if (NONEXISTENT(header))
5362                                                 header = NULL;
5363                                         break;
5364                                 } else
5365                                         header = header->down;
5366                         } while (header != NULL);
5367                         if (header != NULL)
5368                                 cname = ISC_TRUE;
5369                 } else {
5370                         /*
5371                          * Look for active extant "other data".
5372                          *
5373                          * "Other data" is any rdataset whose type is not
5374                          * KEY, NSEC, SIG or RRSIG.
5375                          */
5376                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
5377                         if (rdtype != dns_rdatatype_key &&
5378                             rdtype != dns_rdatatype_sig &&
5379                             rdtype != dns_rdatatype_nsec &&
5380                             rdtype != dns_rdatatype_rrsig) {
5381                                 /*
5382                                  * Is it active and extant?
5383                                  */
5384                                 do {
5385                                         if (header->serial <= serial &&
5386                                             !IGNORE(header)) {
5387                                                 /*
5388                                                  * Is this a "this rdataset
5389                                                  * doesn't exist" record?
5390                                                  */
5391                                                 if (NONEXISTENT(header))
5392                                                         header = NULL;
5393                                                 break;
5394                                         } else
5395                                                 header = header->down;
5396                                 } while (header != NULL);
5397                                 if (header != NULL)
5398                                         other_data = ISC_TRUE;
5399                         }
5400                 }
5401         }
5402
5403         if (cname && other_data)
5404                 return (ISC_TRUE);
5405
5406         return (ISC_FALSE);
5407 }
5408
5409 static isc_result_t
5410 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5411         isc_result_t result;
5412
5413         INSIST(!IS_CACHE(rbtdb));
5414         INSIST(newheader->heap_index == 0);
5415         INSIST(!ISC_LINK_LINKED(newheader, link));
5416
5417         result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5418         return (result);
5419 }
5420
5421 static isc_result_t
5422 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5423     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5424     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5425 {
5426         rbtdb_changed_t *changed = NULL;
5427         rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
5428         unsigned char *merged;
5429         isc_result_t result;
5430         isc_boolean_t header_nx;
5431         isc_boolean_t newheader_nx;
5432         isc_boolean_t merge;
5433         dns_rdatatype_t rdtype, covers;
5434         rbtdb_rdatatype_t negtype, sigtype;
5435         dns_trust_t trust;
5436         int idx;
5437
5438         /*
5439          * Add an rdatasetheader_t to a node.
5440          */
5441
5442         /*
5443          * Caller must be holding the node lock.
5444          */
5445
5446         if ((options & DNS_DBADD_MERGE) != 0) {
5447                 REQUIRE(rbtversion != NULL);
5448                 merge = ISC_TRUE;
5449         } else
5450                 merge = ISC_FALSE;
5451
5452         if ((options & DNS_DBADD_FORCE) != 0)
5453                 trust = dns_trust_ultimate;
5454         else
5455                 trust = newheader->trust;
5456
5457         if (rbtversion != NULL && !loading) {
5458                 /*
5459                  * We always add a changed record, even if no changes end up
5460                  * being made to this node, because it's harmless and
5461                  * simplifies the code.
5462                  */
5463                 changed = add_changed(rbtdb, rbtversion, rbtnode);
5464                 if (changed == NULL) {
5465                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5466                         return (ISC_R_NOMEMORY);
5467                 }
5468         }
5469
5470         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5471         topheader_prev = NULL;
5472         sigheader = NULL;
5473         negtype = 0;
5474         if (rbtversion == NULL && !newheader_nx) {
5475                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5476                 if (rdtype == 0) {
5477                         /*
5478                          * We're adding a negative cache entry.
5479                          */
5480                         covers = RBTDB_RDATATYPE_EXT(newheader->type);
5481                         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
5482                                                         covers);
5483                         for (topheader = rbtnode->data;
5484                              topheader != NULL;
5485                              topheader = topheader->next) {
5486                                 /*
5487                                  * If we're adding an negative cache entry
5488                                  * which covers all types (NXDOMAIN,
5489                                  * NODATA(QTYPE=ANY)).
5490                                  *
5491                                  * We make all other data stale so that the
5492                                  * only rdataset that can be found at this
5493                                  * node is the negative cache entry.
5494                                  *
5495                                  * Otherwise look for any RRSIGs of the
5496                                  * given type so they can be marked stale
5497                                  * later.
5498                                  */
5499                                 if (covers == dns_rdatatype_any) {
5500                                         set_ttl(rbtdb, topheader, 0);
5501                                         topheader->attributes |=
5502                                                 RDATASET_ATTR_STALE;
5503                                         rbtnode->dirty = 1;
5504                                 } else if (topheader->type == sigtype)
5505                                         sigheader = topheader;
5506                         }
5507                         if (covers == dns_rdatatype_any)
5508                                 goto find_header;
5509                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5510                 } else {
5511                         /*
5512                          * We're adding something that isn't a
5513                          * negative cache entry.  Look for an extant
5514                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5515                          * cache entry.
5516                          */
5517                         for (topheader = rbtnode->data;
5518                              topheader != NULL;
5519                              topheader = topheader->next) {
5520                                 if (topheader->type ==
5521                                     RBTDB_RDATATYPE_NCACHEANY)
5522                                         break;
5523                         }
5524                         if (topheader != NULL && EXISTS(topheader) &&
5525                             topheader->rdh_ttl > now) {
5526                                 /*
5527                                  * Found one.
5528                                  */
5529                                 if (trust < topheader->trust) {
5530                                         /*
5531                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
5532                                          * is more trusted.
5533                                          */
5534                                         free_rdataset(rbtdb,
5535                                                       rbtdb->common.mctx,
5536                                                       newheader);
5537                                         if (addedrdataset != NULL)
5538                                                 bind_rdataset(rbtdb, rbtnode,
5539                                                               topheader, now,
5540                                                               addedrdataset);
5541                                         return (DNS_R_UNCHANGED);
5542                                 }
5543                                 /*
5544                                  * The new rdataset is better.  Expire the
5545                                  * NXDOMAIN/NODATA(QTYPE=ANY).
5546                                  */
5547                                 set_ttl(rbtdb, topheader, 0);
5548                                 topheader->attributes |= RDATASET_ATTR_STALE;
5549                                 rbtnode->dirty = 1;
5550                                 topheader = NULL;
5551                                 goto find_header;
5552                         }
5553                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5554                 }
5555         }
5556
5557         for (topheader = rbtnode->data;
5558              topheader != NULL;
5559              topheader = topheader->next) {
5560                 if (topheader->type == newheader->type ||
5561                     topheader->type == negtype)
5562                         break;
5563                 topheader_prev = topheader;
5564         }
5565
5566  find_header:
5567         /*
5568          * If header isn't NULL, we've found the right type.  There may be
5569          * IGNORE rdatasets between the top of the chain and the first real
5570          * data.  We skip over them.
5571          */
5572         header = topheader;
5573         while (header != NULL && IGNORE(header))
5574                 header = header->down;
5575         if (header != NULL) {
5576                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5577
5578                 /*
5579                  * Deleting an already non-existent rdataset has no effect.
5580                  */
5581                 if (header_nx && newheader_nx) {
5582                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5583                         return (DNS_R_UNCHANGED);
5584                 }
5585
5586                 /*
5587                  * Trying to add an rdataset with lower trust to a cache DB
5588                  * has no effect, provided that the cache data isn't stale.
5589                  */
5590                 if (rbtversion == NULL && trust < header->trust &&
5591                     (header->rdh_ttl > now || header_nx)) {
5592                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5593                         if (addedrdataset != NULL)
5594                                 bind_rdataset(rbtdb, rbtnode, header, now,
5595                                               addedrdataset);
5596                         return (DNS_R_UNCHANGED);
5597                 }
5598
5599                 /*
5600                  * Don't merge if a nonexistent rdataset is involved.
5601                  */
5602                 if (merge && (header_nx || newheader_nx))
5603                         merge = ISC_FALSE;
5604
5605                 /*
5606                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5607                  * that is the union of 'newheader' and 'header'.
5608                  */
5609                 if (merge) {
5610                         unsigned int flags = 0;
5611                         INSIST(rbtversion->serial >= header->serial);
5612                         merged = NULL;
5613                         result = ISC_R_SUCCESS;
5614
5615                         if ((options & DNS_DBADD_EXACT) != 0)
5616                                 flags |= DNS_RDATASLAB_EXACT;
5617                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5618                              newheader->rdh_ttl != header->rdh_ttl)
5619                                         result = DNS_R_NOTEXACT;
5620                         else if (newheader->rdh_ttl != header->rdh_ttl)
5621                                 flags |= DNS_RDATASLAB_FORCE;
5622                         if (result == ISC_R_SUCCESS)
5623                                 result = dns_rdataslab_merge(
5624                                              (unsigned char *)header,
5625                                              (unsigned char *)newheader,
5626                                              (unsigned int)(sizeof(*newheader)),
5627                                              rbtdb->common.mctx,
5628                                              rbtdb->common.rdclass,
5629                                              (dns_rdatatype_t)header->type,
5630                                              flags, &merged);
5631                         if (result == ISC_R_SUCCESS) {
5632                                 /*
5633                                  * If 'header' has the same serial number as
5634                                  * we do, we could clean it up now if we knew
5635                                  * that our caller had no references to it.
5636                                  * We don't know this, however, so we leave it
5637                                  * alone.  It will get cleaned up when
5638                                  * clean_zone_node() runs.
5639                                  */
5640                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5641                                               newheader);
5642                                 newheader = (rdatasetheader_t *)merged;
5643                                 if (loading && RESIGN(newheader) &&
5644                                     RESIGN(header) &&
5645                                     header->resign < newheader->resign)
5646                                         newheader->resign = header->resign;
5647                         } else {
5648                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5649                                               newheader);
5650                                 return (result);
5651                         }
5652                 }
5653                 /*
5654                  * Don't replace existing NS, A and AAAA RRsets
5655                  * in the cache if they are already exist.  This
5656                  * prevents named being locked to old servers.
5657                  * Don't lower trust of existing record if the
5658                  * update is forced.
5659                  */
5660                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5661                     header->type == dns_rdatatype_ns &&
5662                     !header_nx && !newheader_nx &&
5663                     header->trust >= newheader->trust &&
5664                     dns_rdataslab_equalx((unsigned char *)header,
5665                                          (unsigned char *)newheader,
5666                                          (unsigned int)(sizeof(*newheader)),
5667                                          rbtdb->common.rdclass,
5668                                          (dns_rdatatype_t)header->type)) {
5669                         /*
5670                          * Honour the new ttl if it is less than the
5671                          * older one.
5672                          */
5673                         if (header->rdh_ttl > newheader->rdh_ttl)
5674                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5675                         if (header->noqname == NULL &&
5676                             newheader->noqname != NULL) {
5677                                 header->noqname = newheader->noqname;
5678                                 newheader->noqname = NULL;
5679                         }
5680                         if (header->closest == NULL &&
5681                             newheader->closest != NULL) {
5682                                 header->closest = newheader->closest;
5683                                 newheader->closest = NULL;
5684                         }
5685                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5686                         if (addedrdataset != NULL)
5687                                 bind_rdataset(rbtdb, rbtnode, header, now,
5688                                               addedrdataset);
5689                         return (ISC_R_SUCCESS);
5690                 }
5691                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5692                     (header->type == dns_rdatatype_a ||
5693                      header->type == dns_rdatatype_aaaa) &&
5694                     !header_nx && !newheader_nx &&
5695                     header->trust >= newheader->trust &&
5696                     dns_rdataslab_equal((unsigned char *)header,
5697                                         (unsigned char *)newheader,
5698                                         (unsigned int)(sizeof(*newheader)))) {
5699                         /*
5700                          * Honour the new ttl if it is less than the
5701                          * older one.
5702                          */
5703                         if (header->rdh_ttl > newheader->rdh_ttl)
5704                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5705                         if (header->noqname == NULL &&
5706                             newheader->noqname != NULL) {
5707                                 header->noqname = newheader->noqname;
5708                                 newheader->noqname = NULL;
5709                         }
5710                         if (header->closest == NULL &&
5711                             newheader->closest != NULL) {
5712                                 header->closest = newheader->closest;
5713                                 newheader->closest = NULL;
5714                         }
5715                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5716                         if (addedrdataset != NULL)
5717                                 bind_rdataset(rbtdb, rbtnode, header, now,
5718                                               addedrdataset);
5719                         return (ISC_R_SUCCESS);
5720                 }
5721                 INSIST(rbtversion == NULL ||
5722                        rbtversion->serial >= topheader->serial);
5723                 if (topheader_prev != NULL)
5724                         topheader_prev->next = newheader;
5725                 else
5726                         rbtnode->data = newheader;
5727                 newheader->next = topheader->next;
5728                 if (loading) {
5729                         /*
5730                          * There are no other references to 'header' when
5731                          * loading, so we MAY clean up 'header' now.
5732                          * Since we don't generate changed records when
5733                          * loading, we MUST clean up 'header' now.
5734                          */
5735                         newheader->down = NULL;
5736                         free_rdataset(rbtdb, rbtdb->common.mctx, header);
5737                 } else {
5738                         newheader->down = topheader;
5739                         topheader->next = newheader;
5740                         rbtnode->dirty = 1;
5741                         if (changed != NULL)
5742                                 changed->dirty = ISC_TRUE;
5743                         if (rbtversion == NULL) {
5744                                 set_ttl(rbtdb, header, 0);
5745                                 header->attributes |= RDATASET_ATTR_STALE;
5746                                 if (sigheader != NULL) {
5747                                         set_ttl(rbtdb, sigheader, 0);
5748                                         sigheader->attributes |=
5749                                                  RDATASET_ATTR_STALE;
5750                                 }
5751                         }
5752                         idx = newheader->node->locknum;
5753                         if (IS_CACHE(rbtdb)) {
5754                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5755                                                  newheader, link);
5756                                 /*
5757                                  * XXXMLG We don't check the return value
5758                                  * here.  If it fails, we will not do TTL
5759                                  * based expiry on this node.  However, we
5760                                  * will do it on the LRU side, so memory
5761                                  * will not leak... for long.
5762                                  */
5763                                 isc_heap_insert(rbtdb->heaps[idx], newheader);
5764                         } else if (RESIGN(newheader))
5765                                 resign_insert(rbtdb, idx, newheader);
5766                 }
5767         } else {
5768                 /*
5769                  * No non-IGNORED rdatasets of the given type exist at
5770                  * this node.
5771                  */
5772
5773                 /*
5774                  * If we're trying to delete the type, don't bother.
5775                  */
5776                 if (newheader_nx) {
5777                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5778                         return (DNS_R_UNCHANGED);
5779                 }
5780
5781                 if (topheader != NULL) {
5782                         /*
5783                          * We have an list of rdatasets of the given type,
5784                          * but they're all marked IGNORE.  We simply insert
5785                          * the new rdataset at the head of the list.
5786                          *
5787                          * Ignored rdatasets cannot occur during loading, so
5788                          * we INSIST on it.
5789                          */
5790                         INSIST(!loading);
5791                         INSIST(rbtversion == NULL ||
5792                                rbtversion->serial >= topheader->serial);
5793                         if (topheader_prev != NULL)
5794                                 topheader_prev->next = newheader;
5795                         else
5796                                 rbtnode->data = newheader;
5797                         newheader->next = topheader->next;
5798                         newheader->down = topheader;
5799                         topheader->next = newheader;
5800                         rbtnode->dirty = 1;
5801                         if (changed != NULL)
5802                                 changed->dirty = ISC_TRUE;
5803                 } else {
5804                         /*
5805                          * No rdatasets of the given type exist at the node.
5806                          */
5807                         newheader->next = rbtnode->data;
5808                         newheader->down = NULL;
5809                         rbtnode->data = newheader;
5810                 }
5811                 idx = newheader->node->locknum;
5812                 if (IS_CACHE(rbtdb)) {
5813                         ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5814                                          newheader, link);
5815                         isc_heap_insert(rbtdb->heaps[idx], newheader);
5816                 } else if (RESIGN(newheader)) {
5817                         resign_insert(rbtdb, idx, newheader);
5818                 }
5819         }
5820
5821         /*
5822          * Check if the node now contains CNAME and other data.
5823          */
5824         if (rbtversion != NULL &&
5825             cname_and_other_data(rbtnode, rbtversion->serial))
5826                 return (DNS_R_CNAMEANDOTHER);
5827
5828         if (addedrdataset != NULL)
5829                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5830
5831         return (ISC_R_SUCCESS);
5832 }
5833
5834 static inline isc_boolean_t
5835 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5836                 rbtdb_rdatatype_t type)
5837 {
5838         if (IS_CACHE(rbtdb)) {
5839                 if (type == dns_rdatatype_dname)
5840                         return (ISC_TRUE);
5841                 else
5842                         return (ISC_FALSE);
5843         } else if (type == dns_rdatatype_dname ||
5844                    (type == dns_rdatatype_ns &&
5845                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5846                 return (ISC_TRUE);
5847         return (ISC_FALSE);
5848 }
5849
5850 static inline isc_result_t
5851 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5852            dns_rdataset_t *rdataset)
5853 {
5854         struct noqname *noqname;
5855         isc_mem_t *mctx = rbtdb->common.mctx;
5856         dns_name_t name;
5857         dns_rdataset_t neg, negsig;
5858         isc_result_t result;
5859         isc_region_t r;
5860
5861         dns_name_init(&name, NULL);
5862         dns_rdataset_init(&neg);
5863         dns_rdataset_init(&negsig);
5864
5865         result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
5866         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5867
5868         noqname = isc_mem_get(mctx, sizeof(*noqname));
5869         if (noqname == NULL) {
5870                 result = ISC_R_NOMEMORY;
5871                 goto cleanup;
5872         }
5873         dns_name_init(&noqname->name, NULL);
5874         noqname->neg = NULL;
5875         noqname->negsig = NULL;
5876         noqname->type = neg.type;
5877         result = dns_name_dup(&name, mctx, &noqname->name);
5878         if (result != ISC_R_SUCCESS)
5879                 goto cleanup;
5880         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5881         if (result != ISC_R_SUCCESS)
5882                 goto cleanup;
5883         noqname->neg = r.base;
5884         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5885         if (result != ISC_R_SUCCESS)
5886                 goto cleanup;
5887         noqname->negsig = r.base;
5888         dns_rdataset_disassociate(&neg);
5889         dns_rdataset_disassociate(&negsig);
5890         newheader->noqname = noqname;
5891         return (ISC_R_SUCCESS);
5892
5893 cleanup:
5894         dns_rdataset_disassociate(&neg);
5895         dns_rdataset_disassociate(&negsig);
5896         free_noqname(mctx, &noqname);
5897         return(result);
5898 }
5899
5900 static inline isc_result_t
5901 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5902            dns_rdataset_t *rdataset)
5903 {
5904         struct noqname *closest;
5905         isc_mem_t *mctx = rbtdb->common.mctx;
5906         dns_name_t name;
5907         dns_rdataset_t neg, negsig;
5908         isc_result_t result;
5909         isc_region_t r;
5910
5911         dns_name_init(&name, NULL);
5912         dns_rdataset_init(&neg);
5913         dns_rdataset_init(&negsig);
5914
5915         result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
5916         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5917
5918         closest = isc_mem_get(mctx, sizeof(*closest));
5919         if (closest == NULL) {
5920                 result = ISC_R_NOMEMORY;
5921                 goto cleanup;
5922         }
5923         dns_name_init(&closest->name, NULL);
5924         closest->neg = NULL;
5925         closest->negsig = NULL;
5926         closest->type = neg.type;
5927         result = dns_name_dup(&name, mctx, &closest->name);
5928         if (result != ISC_R_SUCCESS)
5929                 goto cleanup;
5930         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5931         if (result != ISC_R_SUCCESS)
5932                 goto cleanup;
5933         closest->neg = r.base;
5934         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5935         if (result != ISC_R_SUCCESS)
5936                 goto cleanup;
5937         closest->negsig = r.base;
5938         dns_rdataset_disassociate(&neg);
5939         dns_rdataset_disassociate(&negsig);
5940         newheader->closest = closest;
5941         return (ISC_R_SUCCESS);
5942
5943  cleanup:
5944         dns_rdataset_disassociate(&neg);
5945         dns_rdataset_disassociate(&negsig);
5946         free_noqname(mctx, &closest);
5947         return(result);
5948 }
5949
5950 static dns_dbmethods_t zone_methods;
5951
5952 static isc_result_t
5953 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5954             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
5955             dns_rdataset_t *addedrdataset)
5956 {
5957         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5958         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5959         rbtdb_version_t *rbtversion = version;
5960         isc_region_t region;
5961         rdatasetheader_t *newheader;
5962         rdatasetheader_t *header;
5963         isc_result_t result;
5964         isc_boolean_t delegating;
5965         isc_boolean_t tree_locked = ISC_FALSE;
5966         isc_boolean_t cache_is_overmem = ISC_FALSE;
5967
5968         REQUIRE(VALID_RBTDB(rbtdb));
5969
5970         if (rbtdb->common.methods == &zone_methods)
5971                 REQUIRE(((rbtnode->nsec3 &&
5972                           (rdataset->type == dns_rdatatype_nsec3 ||
5973                            rdataset->covers == dns_rdatatype_nsec3)) ||
5974                          (!rbtnode->nsec3 &&
5975                            rdataset->type != dns_rdatatype_nsec3 &&
5976                            rdataset->covers != dns_rdatatype_nsec3)));
5977
5978         if (rbtversion == NULL) {
5979                 if (now == 0)
5980                         isc_stdtime_get(&now);
5981         } else
5982                 now = 0;
5983
5984         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5985                                             &region,
5986                                             sizeof(rdatasetheader_t));
5987         if (result != ISC_R_SUCCESS)
5988                 return (result);
5989
5990         newheader = (rdatasetheader_t *)region.base;
5991         init_rdataset(rbtdb, newheader);
5992         set_ttl(rbtdb, newheader, rdataset->ttl + now);
5993         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5994                                                 rdataset->covers);
5995         newheader->attributes = 0;
5996         newheader->noqname = NULL;
5997         newheader->closest = NULL;
5998         newheader->count = init_count++;
5999         newheader->trust = rdataset->trust;
6000         newheader->additional_auth = NULL;
6001         newheader->additional_glue = NULL;
6002         newheader->last_used = now;
6003         newheader->node = rbtnode;
6004         if (rbtversion != NULL) {
6005                 newheader->serial = rbtversion->serial;
6006                 now = 0;
6007
6008                 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6009                         newheader->attributes |= RDATASET_ATTR_RESIGN;
6010                         newheader->resign = rdataset->resign;
6011                 } else
6012                         newheader->resign = 0;
6013         } else {
6014                 newheader->serial = 1;
6015                 newheader->resign = 0;
6016                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6017                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6018                 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6019                         newheader->attributes |= RDATASET_ATTR_OPTOUT;
6020                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6021                         result = addnoqname(rbtdb, newheader, rdataset);
6022                         if (result != ISC_R_SUCCESS) {
6023                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6024                                               newheader);
6025                                 return (result);
6026                         }
6027                 }
6028                 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6029                         result = addclosest(rbtdb, newheader, rdataset);
6030                         if (result != ISC_R_SUCCESS) {
6031                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6032                                               newheader);
6033                                 return (result);
6034                         }
6035                 }
6036         }
6037
6038         /*
6039          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6040          * just DNAME for the cache), then we need to set the callback bit
6041          * on the node.
6042          */
6043         if (delegating_type(rbtdb, rbtnode, rdataset->type))
6044                 delegating = ISC_TRUE;
6045         else
6046                 delegating = ISC_FALSE;
6047
6048         /*
6049          * If we're adding a delegation type or the DB is a cache in an overmem
6050          * state, hold an exclusive lock on the tree.  In the latter case
6051          * the lock does not necessarily have to be acquired but it will help
6052          * purge stale entries more effectively.
6053          */
6054         if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
6055                 cache_is_overmem = ISC_TRUE;
6056         if (delegating || cache_is_overmem) {
6057                 tree_locked = ISC_TRUE;
6058                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6059         }
6060
6061         if (cache_is_overmem)
6062                 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6063
6064         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6065                   isc_rwlocktype_write);
6066
6067         if (rbtdb->rrsetstats != NULL) {
6068                 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6069                 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6070         }
6071
6072         if (IS_CACHE(rbtdb)) {
6073                 if (tree_locked)
6074                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6075
6076                 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6077                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6078                         expire_header(rbtdb, header, tree_locked);
6079
6080                 /*
6081                  * If we've been holding a write lock on the tree just for
6082                  * cleaning, we can release it now.  However, we still need the
6083                  * node lock.
6084                  */
6085                 if (tree_locked && !delegating) {
6086                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6087                         tree_locked = ISC_FALSE;
6088                 }
6089         }
6090
6091         result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
6092                      addedrdataset, now);
6093         if (result == ISC_R_SUCCESS && delegating)
6094                 rbtnode->find_callback = 1;
6095
6096         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6097                     isc_rwlocktype_write);
6098
6099         if (tree_locked)
6100                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6101
6102         /*
6103          * Update the zone's secure status.  If version is non-NULL
6104          * this is deferred until closeversion() is called.
6105          */
6106         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6107                 iszonesecure(db, version, rbtdb->origin_node);
6108
6109         return (result);
6110 }
6111
6112 static isc_result_t
6113 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6114                  dns_rdataset_t *rdataset, unsigned int options,
6115                  dns_rdataset_t *newrdataset)
6116 {
6117         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6118         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6119         rbtdb_version_t *rbtversion = version;
6120         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6121         unsigned char *subresult;
6122         isc_region_t region;
6123         isc_result_t result;
6124         rbtdb_changed_t *changed;
6125
6126         REQUIRE(VALID_RBTDB(rbtdb));
6127
6128         if (rbtdb->common.methods == &zone_methods)
6129                 REQUIRE(((rbtnode->nsec3 &&
6130                           (rdataset->type == dns_rdatatype_nsec3 ||
6131                            rdataset->covers == dns_rdatatype_nsec3)) ||
6132                          (!rbtnode->nsec3 &&
6133                            rdataset->type != dns_rdatatype_nsec3 &&
6134                            rdataset->covers != dns_rdatatype_nsec3)));
6135
6136         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6137                                             &region,
6138                                             sizeof(rdatasetheader_t));
6139         if (result != ISC_R_SUCCESS)
6140                 return (result);
6141         newheader = (rdatasetheader_t *)region.base;
6142         init_rdataset(rbtdb, newheader);
6143         set_ttl(rbtdb, newheader, rdataset->ttl);
6144         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6145                                                 rdataset->covers);
6146         newheader->attributes = 0;
6147         newheader->serial = rbtversion->serial;
6148         newheader->trust = 0;
6149         newheader->noqname = NULL;
6150         newheader->closest = NULL;
6151         newheader->count = init_count++;
6152         newheader->additional_auth = NULL;
6153         newheader->additional_glue = NULL;
6154         newheader->last_used = 0;
6155         newheader->node = rbtnode;
6156         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6157                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6158                 newheader->resign = rdataset->resign;
6159         } else
6160                 newheader->resign = 0;
6161
6162         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6163                   isc_rwlocktype_write);
6164
6165         changed = add_changed(rbtdb, rbtversion, rbtnode);
6166         if (changed == NULL) {
6167                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6168                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6169                             isc_rwlocktype_write);
6170                 return (ISC_R_NOMEMORY);
6171         }
6172
6173         topheader_prev = NULL;
6174         for (topheader = rbtnode->data;
6175              topheader != NULL;
6176              topheader = topheader->next) {
6177                 if (topheader->type == newheader->type)
6178                         break;
6179                 topheader_prev = topheader;
6180         }
6181         /*
6182          * If header isn't NULL, we've found the right type.  There may be
6183          * IGNORE rdatasets between the top of the chain and the first real
6184          * data.  We skip over them.
6185          */
6186         header = topheader;
6187         while (header != NULL && IGNORE(header))
6188                 header = header->down;
6189         if (header != NULL && EXISTS(header)) {
6190                 unsigned int flags = 0;
6191                 subresult = NULL;
6192                 result = ISC_R_SUCCESS;
6193                 if ((options & DNS_DBSUB_EXACT) != 0) {
6194                         flags |= DNS_RDATASLAB_EXACT;
6195                         if (newheader->rdh_ttl != header->rdh_ttl)
6196                                 result = DNS_R_NOTEXACT;
6197                 }
6198                 if (result == ISC_R_SUCCESS)
6199                         result = dns_rdataslab_subtract(
6200                                         (unsigned char *)header,
6201                                         (unsigned char *)newheader,
6202                                         (unsigned int)(sizeof(*newheader)),
6203                                         rbtdb->common.mctx,
6204                                         rbtdb->common.rdclass,
6205                                         (dns_rdatatype_t)header->type,
6206                                         flags, &subresult);
6207                 if (result == ISC_R_SUCCESS) {
6208                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6209                         newheader = (rdatasetheader_t *)subresult;
6210                         init_rdataset(rbtdb, newheader);
6211                         /*
6212                          * We have to set the serial since the rdataslab
6213                          * subtraction routine copies the reserved portion of
6214                          * header, not newheader.
6215                          */
6216                         newheader->serial = rbtversion->serial;
6217                         /*
6218                          * XXXJT: dns_rdataslab_subtract() copied the pointers
6219                          * to additional info.  We need to clear these fields
6220                          * to avoid having duplicated references.
6221                          */
6222                         newheader->additional_auth = NULL;
6223                         newheader->additional_glue = NULL;
6224                 } else if (result == DNS_R_NXRRSET) {
6225                         /*
6226                          * This subtraction would remove all of the rdata;
6227                          * add a nonexistent header instead.
6228                          */
6229                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6230                         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6231                         if (newheader == NULL) {
6232                                 result = ISC_R_NOMEMORY;
6233                                 goto unlock;
6234                         }
6235                         set_ttl(rbtdb, newheader, 0);
6236                         newheader->type = topheader->type;
6237                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6238                         newheader->trust = 0;
6239                         newheader->serial = rbtversion->serial;
6240                         newheader->noqname = NULL;
6241                         newheader->closest = NULL;
6242                         newheader->count = 0;
6243                         newheader->additional_auth = NULL;
6244                         newheader->additional_glue = NULL;
6245                         newheader->node = rbtnode;
6246                         newheader->resign = 0;
6247                         newheader->last_used = 0;
6248                 } else {
6249                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6250                         goto unlock;
6251                 }
6252
6253                 /*
6254                  * If we're here, we want to link newheader in front of
6255                  * topheader.
6256                  */
6257                 INSIST(rbtversion->serial >= topheader->serial);
6258                 if (topheader_prev != NULL)
6259                         topheader_prev->next = newheader;
6260                 else
6261                         rbtnode->data = newheader;
6262                 newheader->next = topheader->next;
6263                 newheader->down = topheader;
6264                 topheader->next = newheader;
6265                 rbtnode->dirty = 1;
6266                 changed->dirty = ISC_TRUE;
6267         } else {
6268                 /*
6269                  * The rdataset doesn't exist, so we don't need to do anything
6270                  * to satisfy the deletion request.
6271                  */
6272                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6273                 if ((options & DNS_DBSUB_EXACT) != 0)
6274                         result = DNS_R_NOTEXACT;
6275                 else
6276                         result = DNS_R_UNCHANGED;
6277         }
6278
6279         if (result == ISC_R_SUCCESS && newrdataset != NULL)
6280                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6281
6282  unlock:
6283         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6284                     isc_rwlocktype_write);
6285
6286         /*
6287          * Update the zone's secure status.  If version is non-NULL
6288          * this is deferred until closeversion() is called.
6289          */
6290         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6291                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6292
6293         return (result);
6294 }
6295
6296 static isc_result_t
6297 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6298                dns_rdatatype_t type, dns_rdatatype_t covers)
6299 {
6300         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6301         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6302         rbtdb_version_t *rbtversion = version;
6303         isc_result_t result;
6304         rdatasetheader_t *newheader;
6305
6306         REQUIRE(VALID_RBTDB(rbtdb));
6307
6308         if (type == dns_rdatatype_any)
6309                 return (ISC_R_NOTIMPLEMENTED);
6310         if (type == dns_rdatatype_rrsig && covers == 0)
6311                 return (ISC_R_NOTIMPLEMENTED);
6312
6313         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6314         if (newheader == NULL)
6315                 return (ISC_R_NOMEMORY);
6316         set_ttl(rbtdb, newheader, 0);
6317         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6318         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6319         newheader->trust = 0;
6320         newheader->noqname = NULL;
6321         newheader->closest = NULL;
6322         newheader->additional_auth = NULL;
6323         newheader->additional_glue = NULL;
6324         if (rbtversion != NULL)
6325                 newheader->serial = rbtversion->serial;
6326         else
6327                 newheader->serial = 0;
6328         newheader->count = 0;
6329         newheader->last_used = 0;
6330         newheader->node = rbtnode;
6331
6332         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6333                   isc_rwlocktype_write);
6334
6335         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6336                      ISC_FALSE, NULL, 0);
6337
6338         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6339                     isc_rwlocktype_write);
6340
6341         /*
6342          * Update the zone's secure status.  If version is non-NULL
6343          * this is deferred until closeversion() is called.
6344          */
6345         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6346                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6347
6348         return (result);
6349 }
6350
6351 static isc_result_t
6352 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6353         rbtdb_load_t *loadctx = arg;
6354         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6355         dns_rbtnode_t *node;
6356         isc_result_t result;
6357         isc_region_t region;
6358         rdatasetheader_t *newheader;
6359
6360         /*
6361          * This routine does no node locking.  See comments in
6362          * 'load' below for more information on loading and
6363          * locking.
6364          */
6365
6366
6367         /*
6368          * SOA records are only allowed at top of zone.
6369          */
6370         if (rdataset->type == dns_rdatatype_soa &&
6371             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6372                 return (DNS_R_NOTZONETOP);
6373
6374         if (rdataset->type != dns_rdatatype_nsec3 &&
6375             rdataset->covers != dns_rdatatype_nsec3)
6376                 add_empty_wildcards(rbtdb, name);
6377
6378         if (dns_name_iswildcard(name)) {
6379                 /*
6380                  * NS record owners cannot legally be wild cards.
6381                  */
6382                 if (rdataset->type == dns_rdatatype_ns)
6383                         return (DNS_R_INVALIDNS);
6384                 /*
6385                  * NSEC3 record owners cannot legally be wild cards.
6386                  */
6387                 if (rdataset->type == dns_rdatatype_nsec3)
6388                         return (DNS_R_INVALIDNSEC3);
6389                 result = add_wildcard_magic(rbtdb, name);
6390                 if (result != ISC_R_SUCCESS)
6391                         return (result);
6392         }
6393
6394         node = NULL;
6395         if (rdataset->type == dns_rdatatype_nsec3 ||
6396             rdataset->covers == dns_rdatatype_nsec3) {
6397                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6398                 if (result == ISC_R_SUCCESS)
6399                         node->nsec3 = 1;
6400         } else {
6401                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
6402                 if (result == ISC_R_SUCCESS)
6403                         node->nsec3 = 0;
6404         }
6405         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6406                 return (result);
6407         if (result != ISC_R_EXISTS) {
6408                 dns_name_t foundname;
6409                 dns_name_init(&foundname, NULL);
6410                 dns_rbt_namefromnode(node, &foundname);
6411 #ifdef DNS_RBT_USEHASH
6412                 node->locknum = node->hashval % rbtdb->node_lock_count;
6413 #else
6414                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6415                         rbtdb->node_lock_count;
6416 #endif
6417         }
6418
6419         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6420                                             &region,
6421                                             sizeof(rdatasetheader_t));
6422         if (result != ISC_R_SUCCESS)
6423                 return (result);
6424         newheader = (rdatasetheader_t *)region.base;
6425         init_rdataset(rbtdb, newheader);
6426         set_ttl(rbtdb, newheader,
6427                 rdataset->ttl + loadctx->now); /* XXX overflow check */
6428         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6429                                                 rdataset->covers);
6430         newheader->attributes = 0;
6431         newheader->trust = rdataset->trust;
6432         newheader->serial = 1;
6433         newheader->noqname = NULL;
6434         newheader->closest = NULL;
6435         newheader->count = init_count++;
6436         newheader->additional_auth = NULL;
6437         newheader->additional_glue = NULL;
6438         newheader->last_used = 0;
6439         newheader->node = node;
6440         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6441                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6442                 newheader->resign = rdataset->resign;
6443         } else
6444                 newheader->resign = 0;
6445
6446         result = add(rbtdb, node, rbtdb->current_version, newheader,
6447                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6448         if (result == ISC_R_SUCCESS &&
6449             delegating_type(rbtdb, node, rdataset->type))
6450                 node->find_callback = 1;
6451         else if (result == DNS_R_UNCHANGED)
6452                 result = ISC_R_SUCCESS;
6453
6454         return (result);
6455 }
6456
6457 static isc_result_t
6458 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6459         rbtdb_load_t *loadctx;
6460         dns_rbtdb_t *rbtdb;
6461
6462         rbtdb = (dns_rbtdb_t *)db;
6463
6464         REQUIRE(VALID_RBTDB(rbtdb));
6465
6466         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6467         if (loadctx == NULL)
6468                 return (ISC_R_NOMEMORY);
6469
6470         loadctx->rbtdb = rbtdb;
6471         if (IS_CACHE(rbtdb))
6472                 isc_stdtime_get(&loadctx->now);
6473         else
6474                 loadctx->now = 0;
6475
6476         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6477
6478         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6479                 == 0);
6480         rbtdb->attributes |= RBTDB_ATTR_LOADING;
6481
6482         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6483
6484         *addp = loading_addrdataset;
6485         *dbloadp = loadctx;
6486
6487         return (ISC_R_SUCCESS);
6488 }
6489
6490 static isc_result_t
6491 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6492         rbtdb_load_t *loadctx;
6493         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6494
6495         REQUIRE(VALID_RBTDB(rbtdb));
6496         REQUIRE(dbloadp != NULL);
6497         loadctx = *dbloadp;
6498         REQUIRE(loadctx->rbtdb == rbtdb);
6499
6500         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6501
6502         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6503         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6504
6505         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6506         rbtdb->attributes |= RBTDB_ATTR_LOADED;
6507
6508         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6509
6510         /*
6511          * If there's a KEY rdataset at the zone origin containing a
6512          * zone key, we consider the zone secure.
6513          */
6514         if (! IS_CACHE(rbtdb))
6515                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6516
6517         *dbloadp = NULL;
6518
6519         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6520
6521         return (ISC_R_SUCCESS);
6522 }
6523
6524 static isc_result_t
6525 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6526      dns_masterformat_t masterformat) {
6527         dns_rbtdb_t *rbtdb;
6528
6529         rbtdb = (dns_rbtdb_t *)db;
6530
6531         REQUIRE(VALID_RBTDB(rbtdb));
6532
6533         return (dns_master_dump2(rbtdb->common.mctx, db, version,
6534                                  &dns_master_style_default,
6535                                  filename, masterformat));
6536 }
6537
6538 static void
6539 delete_callback(void *data, void *arg) {
6540         dns_rbtdb_t *rbtdb = arg;
6541         rdatasetheader_t *current, *next;
6542         unsigned int locknum;
6543
6544         current = data;
6545         locknum = current->node->locknum;
6546         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6547         while (current != NULL) {
6548                 next = current->next;
6549                 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6550                 current = next;
6551         }
6552         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6553 }
6554
6555 static isc_boolean_t
6556 issecure(dns_db_t *db) {
6557         dns_rbtdb_t *rbtdb;
6558         isc_boolean_t secure;
6559
6560         rbtdb = (dns_rbtdb_t *)db;
6561
6562         REQUIRE(VALID_RBTDB(rbtdb));
6563
6564         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6565         secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6566         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6567
6568         return (secure);
6569 }
6570
6571 static isc_boolean_t
6572 isdnssec(dns_db_t *db) {
6573         dns_rbtdb_t *rbtdb;
6574         isc_boolean_t dnssec;
6575
6576         rbtdb = (dns_rbtdb_t *)db;
6577
6578         REQUIRE(VALID_RBTDB(rbtdb));
6579
6580         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6581         dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6582         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6583
6584         return (dnssec);
6585 }
6586
6587 static unsigned int
6588 nodecount(dns_db_t *db) {
6589         dns_rbtdb_t *rbtdb;
6590         unsigned int count;
6591
6592         rbtdb = (dns_rbtdb_t *)db;
6593
6594         REQUIRE(VALID_RBTDB(rbtdb));
6595
6596         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6597         count = dns_rbt_nodecount(rbtdb->tree);
6598         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6599
6600         return (count);
6601 }
6602
6603 static void
6604 settask(dns_db_t *db, isc_task_t *task) {
6605         dns_rbtdb_t *rbtdb;
6606
6607         rbtdb = (dns_rbtdb_t *)db;
6608
6609         REQUIRE(VALID_RBTDB(rbtdb));
6610
6611         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6612         if (rbtdb->task != NULL)
6613                 isc_task_detach(&rbtdb->task);
6614         if (task != NULL)
6615                 isc_task_attach(task, &rbtdb->task);
6616         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6617 }
6618
6619 static isc_boolean_t
6620 ispersistent(dns_db_t *db) {
6621         UNUSED(db);
6622         return (ISC_FALSE);
6623 }
6624
6625 static isc_result_t
6626 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6627         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6628         dns_rbtnode_t *onode;
6629         isc_result_t result = ISC_R_SUCCESS;
6630
6631         REQUIRE(VALID_RBTDB(rbtdb));
6632         REQUIRE(nodep != NULL && *nodep == NULL);
6633
6634         /* Note that the access to origin_node doesn't require a DB lock */
6635         onode = (dns_rbtnode_t *)rbtdb->origin_node;
6636         if (onode != NULL) {
6637                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6638                 new_reference(rbtdb, onode);
6639                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6640
6641                 *nodep = rbtdb->origin_node;
6642         } else {
6643                 INSIST(IS_CACHE(rbtdb));
6644                 result = ISC_R_NOTFOUND;
6645         }
6646
6647         return (result);
6648 }
6649
6650 static isc_result_t
6651 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6652                    isc_uint8_t *flags, isc_uint16_t *iterations,
6653                    unsigned char *salt, size_t *salt_length)
6654 {
6655         dns_rbtdb_t *rbtdb;
6656         isc_result_t result = ISC_R_NOTFOUND;
6657         rbtdb_version_t *rbtversion = version;
6658
6659         rbtdb = (dns_rbtdb_t *)db;
6660
6661         REQUIRE(VALID_RBTDB(rbtdb));
6662
6663         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6664
6665         if (rbtversion == NULL)
6666                 rbtversion = rbtdb->current_version;
6667
6668         if (rbtversion->havensec3) {
6669                 if (hash != NULL)
6670                         *hash = rbtversion->hash;
6671                 if (salt != NULL && salt_length != NULL) {
6672                         REQUIRE(*salt_length >= rbtversion->salt_length);
6673                         memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6674                 }
6675                 if (salt_length != NULL)
6676                         *salt_length = rbtversion->salt_length;
6677                 if (iterations != NULL)
6678                         *iterations = rbtversion->iterations;
6679                 if (flags != NULL)
6680                         *flags = rbtversion->flags;
6681                 result = ISC_R_SUCCESS;
6682         }
6683         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6684
6685         return (result);
6686 }
6687
6688 static isc_result_t
6689 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6690         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6691         isc_stdtime_t oldresign;
6692         isc_result_t result = ISC_R_SUCCESS;
6693         rdatasetheader_t *header;
6694
6695         REQUIRE(VALID_RBTDB(rbtdb));
6696         REQUIRE(!IS_CACHE(rbtdb));
6697         REQUIRE(rdataset != NULL);
6698
6699         header = rdataset->private3;
6700         header--;
6701
6702         NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6703                   isc_rwlocktype_write);
6704
6705         oldresign = header->resign;
6706         header->resign = resign;
6707         if (header->heap_index != 0) {
6708                 INSIST(RESIGN(header));
6709                 if (resign == 0) {
6710                         isc_heap_delete(rbtdb->heaps[header->node->locknum],
6711                                         header->heap_index);
6712                         header->heap_index = 0;
6713                 } else if (resign < oldresign)
6714                         isc_heap_increased(rbtdb->heaps[header->node->locknum],
6715                                            header->heap_index);
6716                 else
6717                         isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6718                                            header->heap_index);
6719         } else if (resign && header->heap_index == 0) {
6720                 header->attributes |= RDATASET_ATTR_RESIGN;
6721                 result = resign_insert(rbtdb, header->node->locknum, header);
6722         }
6723         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6724                     isc_rwlocktype_write);
6725         return (result);
6726 }
6727
6728 static isc_result_t
6729 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
6730                dns_name_t *foundname)
6731 {
6732         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6733         rdatasetheader_t *header = NULL, *this;
6734         unsigned int i;
6735         isc_result_t result = ISC_R_NOTFOUND;
6736         unsigned int locknum;
6737
6738         REQUIRE(VALID_RBTDB(rbtdb));
6739
6740         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
6741
6742         for (i = 0; i < rbtdb->node_lock_count; i++) {
6743                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
6744                 this = isc_heap_element(rbtdb->heaps[i], 1);
6745                 if (this == NULL) {
6746                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6747                                     isc_rwlocktype_read);
6748                         continue;
6749                 }
6750                 if (header == NULL)
6751                         header = this;
6752                 else if (isc_serial_lt(this->resign, header->resign)) {
6753                         locknum = header->node->locknum;
6754                         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
6755                                     isc_rwlocktype_read);
6756                         header = this;
6757                 } else
6758                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6759                                     isc_rwlocktype_read);
6760         }
6761
6762         if (header == NULL)
6763                 goto unlock;
6764
6765         bind_rdataset(rbtdb, header->node, header, 0, rdataset);
6766
6767         if (foundname != NULL)
6768                 dns_rbt_fullnamefromnode(header->node, foundname);
6769
6770         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6771                     isc_rwlocktype_read);
6772
6773         result = ISC_R_SUCCESS;
6774
6775  unlock:
6776         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
6777
6778         return (result);
6779 }
6780
6781 static void
6782 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
6783 {
6784         rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
6785         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6786         dns_rbtnode_t *node;
6787         rdatasetheader_t *header;
6788
6789         REQUIRE(VALID_RBTDB(rbtdb));
6790         REQUIRE(rdataset != NULL);
6791         REQUIRE(rbtdb->future_version == rbtversion);
6792         REQUIRE(rbtversion->writer);
6793
6794         node = rdataset->private2;
6795         header = rdataset->private3;
6796         header--;
6797
6798         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6799         NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
6800                   isc_rwlocktype_write);
6801         /*
6802          * Delete from heap and save to re-signed list so that it can
6803          * be restored if we backout of this change.
6804          */
6805         new_reference(rbtdb, node);
6806         isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
6807         header->heap_index = 0;
6808         ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
6809
6810         NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
6811                     isc_rwlocktype_write);
6812         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6813 }
6814
6815 static dns_stats_t *
6816 getrrsetstats(dns_db_t *db) {
6817         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6818
6819         REQUIRE(VALID_RBTDB(rbtdb));
6820         REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6821
6822         return (rbtdb->rrsetstats);
6823 }
6824
6825 static dns_dbmethods_t zone_methods = {
6826         attach,
6827         detach,
6828         beginload,
6829         endload,
6830         dump,
6831         currentversion,
6832         newversion,
6833         attachversion,
6834         closeversion,
6835         findnode,
6836         zone_find,
6837         zone_findzonecut,
6838         attachnode,
6839         detachnode,
6840         expirenode,
6841         printnode,
6842         createiterator,
6843         zone_findrdataset,
6844         allrdatasets,
6845         addrdataset,
6846         subtractrdataset,
6847         deleterdataset,
6848         issecure,
6849         nodecount,
6850         ispersistent,
6851         overmem,
6852         settask,
6853         getoriginnode,
6854         NULL,
6855         getnsec3parameters,
6856         findnsec3node,
6857         setsigningtime,
6858         getsigningtime,
6859         resigned,
6860         isdnssec,
6861         NULL
6862 };
6863
6864 static dns_dbmethods_t cache_methods = {
6865         attach,
6866         detach,
6867         beginload,
6868         endload,
6869         dump,
6870         currentversion,
6871         newversion,
6872         attachversion,
6873         closeversion,
6874         findnode,
6875         cache_find,
6876         cache_findzonecut,
6877         attachnode,
6878         detachnode,
6879         expirenode,
6880         printnode,
6881         createiterator,
6882         cache_findrdataset,
6883         allrdatasets,
6884         addrdataset,
6885         subtractrdataset,
6886         deleterdataset,
6887         issecure,
6888         nodecount,
6889         ispersistent,
6890         overmem,
6891         settask,
6892         getoriginnode,
6893         NULL,
6894         NULL,
6895         NULL,
6896         NULL,
6897         NULL,
6898         NULL,
6899         isdnssec,
6900         getrrsetstats
6901 };
6902
6903 isc_result_t
6904 #ifdef DNS_RBTDB_VERSION64
6905 dns_rbtdb64_create
6906 #else
6907 dns_rbtdb_create
6908 #endif
6909                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6910                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6911                  void *driverarg, dns_db_t **dbp)
6912 {
6913         dns_rbtdb_t *rbtdb;
6914         isc_result_t result;
6915         int i;
6916         dns_name_t name;
6917         isc_boolean_t (*sooner)(void *, void *);
6918
6919         /* Keep the compiler happy. */
6920         UNUSED(argc);
6921         UNUSED(argv);
6922         UNUSED(driverarg);
6923
6924         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
6925         if (rbtdb == NULL)
6926                 return (ISC_R_NOMEMORY);
6927
6928         memset(rbtdb, '\0', sizeof(*rbtdb));
6929         dns_name_init(&rbtdb->common.origin, NULL);
6930         rbtdb->common.attributes = 0;
6931         if (type == dns_dbtype_cache) {
6932                 rbtdb->common.methods = &cache_methods;
6933                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
6934         } else if (type == dns_dbtype_stub) {
6935                 rbtdb->common.methods = &zone_methods;
6936                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
6937         } else
6938                 rbtdb->common.methods = &zone_methods;
6939         rbtdb->common.rdclass = rdclass;
6940         rbtdb->common.mctx = NULL;
6941
6942         result = RBTDB_INITLOCK(&rbtdb->lock);
6943         if (result != ISC_R_SUCCESS)
6944                 goto cleanup_rbtdb;
6945
6946         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
6947         if (result != ISC_R_SUCCESS)
6948                 goto cleanup_lock;
6949
6950         /*
6951          * Initialize node_lock_count in a generic way to support future
6952          * extension which allows the user to specify this value on creation.
6953          * Note that when specified for a cache DB it must be larger than 1
6954          * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
6955          */
6956         if (rbtdb->node_lock_count == 0) {
6957                 if (IS_CACHE(rbtdb))
6958                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
6959                 else
6960                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
6961         } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
6962                 result = ISC_R_RANGE;
6963                 goto cleanup_tree_lock;
6964         }
6965         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
6966         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
6967                                         sizeof(rbtdb_nodelock_t));
6968         if (rbtdb->node_locks == NULL) {
6969                 result = ISC_R_NOMEMORY;
6970                 goto cleanup_tree_lock;
6971         }
6972
6973         rbtdb->rrsetstats = NULL;
6974         if (IS_CACHE(rbtdb)) {
6975                 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
6976                 if (result != ISC_R_SUCCESS)
6977                         goto cleanup_node_locks;
6978                 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
6979                                                sizeof(rdatasetheaderlist_t));
6980                 if (rbtdb->rdatasets == NULL) {
6981                         result = ISC_R_NOMEMORY;
6982                         goto cleanup_rrsetstats;
6983                 }
6984                 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6985                         ISC_LIST_INIT(rbtdb->rdatasets[i]);
6986         } else
6987                 rbtdb->rdatasets = NULL;
6988
6989         /*
6990          * Create the heaps.
6991          */
6992         rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
6993                                    sizeof(isc_heap_t *));
6994         if (rbtdb->heaps == NULL) {
6995                 result = ISC_R_NOMEMORY;
6996                 goto cleanup_rdatasets;
6997         }
6998         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6999                 rbtdb->heaps[i] = NULL;
7000         sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
7001         for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
7002                 result = isc_heap_create(mctx, sooner, set_index, 0,
7003                                          &rbtdb->heaps[i]);
7004                 if (result != ISC_R_SUCCESS)
7005                         goto cleanup_heaps;
7006         }
7007
7008         /*
7009          * Create deadnode lists.
7010          */
7011         rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
7012                                        sizeof(rbtnodelist_t));
7013         if (rbtdb->deadnodes == NULL) {
7014                 result = ISC_R_NOMEMORY;
7015                 goto cleanup_heaps;
7016         }
7017         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7018                 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7019
7020         rbtdb->active = rbtdb->node_lock_count;
7021
7022         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7023                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7024                 if (result == ISC_R_SUCCESS) {
7025                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7026                         if (result != ISC_R_SUCCESS)
7027                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7028                 }
7029                 if (result != ISC_R_SUCCESS) {
7030                         while (i-- > 0) {
7031                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7032                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7033                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7034                         }
7035                         goto cleanup_deadnodes;
7036                 }
7037                 rbtdb->node_locks[i].exiting = ISC_FALSE;
7038         }
7039
7040         /*
7041          * Attach to the mctx.  The database will persist so long as there
7042          * are references to it, and attaching to the mctx ensures that our
7043          * mctx won't disappear out from under us.
7044          */
7045         isc_mem_attach(mctx, &rbtdb->common.mctx);
7046
7047         /*
7048          * Must be initialized before free_rbtdb() is called.
7049          */
7050         isc_ondestroy_init(&rbtdb->common.ondest);
7051
7052         /*
7053          * Make a copy of the origin name.
7054          */
7055         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7056         if (result != ISC_R_SUCCESS) {
7057                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7058                 return (result);
7059         }
7060
7061         /*
7062          * Make the Red-Black Trees.
7063          */
7064         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7065         if (result != ISC_R_SUCCESS) {
7066                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7067                 return (result);
7068         }
7069
7070         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7071         if (result != ISC_R_SUCCESS) {
7072                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7073                 return (result);
7074         }
7075
7076         /*
7077          * In order to set the node callback bit correctly in zone databases,
7078          * we need to know if the node has the origin name of the zone.
7079          * In loading_addrdataset() we could simply compare the new name
7080          * to the origin name, but this is expensive.  Also, we don't know the
7081          * node name in addrdataset(), so we need another way of knowing the
7082          * zone's top.
7083          *
7084          * We now explicitly create a node for the zone's origin, and then
7085          * we simply remember the node's address.  This is safe, because
7086          * the top-of-zone node can never be deleted, nor can its address
7087          * change.
7088          */
7089         if (!IS_CACHE(rbtdb)) {
7090                 dns_rbtnode_t *nsec3node;
7091
7092                 rbtdb->origin_node = NULL;
7093                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7094                                          &rbtdb->origin_node);
7095                 if (result != ISC_R_SUCCESS) {
7096                         INSIST(result != ISC_R_EXISTS);
7097                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7098                         return (result);
7099                 }
7100                 rbtdb->origin_node->nsec3 = 0;
7101                 /*
7102                  * We need to give the origin node the right locknum.
7103                  */
7104                 dns_name_init(&name, NULL);
7105                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7106 #ifdef DNS_RBT_USEHASH
7107                 rbtdb->origin_node->locknum =
7108                         rbtdb->origin_node->hashval %
7109                         rbtdb->node_lock_count;
7110 #else
7111                 rbtdb->origin_node->locknum =
7112                         dns_name_hash(&name, ISC_TRUE) %
7113                         rbtdb->node_lock_count;
7114 #endif
7115                 /*
7116                  * Add an apex node to the NSEC3 tree so that NSEC3 searches
7117                  * return partial matches when there is only a single NSEC3
7118                  * record in the tree.
7119                  */
7120                 nsec3node = NULL;
7121                 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
7122                                          &nsec3node);
7123                 if (result != ISC_R_SUCCESS) {
7124                         INSIST(result != ISC_R_EXISTS);
7125                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7126                         return (result);
7127                 }
7128                 nsec3node->nsec3 = 1;
7129                 /*
7130                  * We need to give the nsec3 origin node the right locknum.
7131                  */
7132                 dns_name_init(&name, NULL);
7133                 dns_rbt_namefromnode(nsec3node, &name);
7134 #ifdef DNS_RBT_USEHASH
7135                 nsec3node->locknum = nsec3node->hashval %
7136                         rbtdb->node_lock_count;
7137 #else
7138                 nsec3node->locknum = dns_name_hash(&name, ISC_TRUE) %
7139                         rbtdb->node_lock_count;
7140 #endif
7141         }
7142
7143         /*
7144          * Misc. Initialization.
7145          */
7146         result = isc_refcount_init(&rbtdb->references, 1);
7147         if (result != ISC_R_SUCCESS) {
7148                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7149                 return (result);
7150         }
7151         rbtdb->attributes = 0;
7152         rbtdb->task = NULL;
7153
7154         /*
7155          * Version Initialization.
7156          */
7157         rbtdb->current_serial = 1;
7158         rbtdb->least_serial = 1;
7159         rbtdb->next_serial = 2;
7160         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7161         if (rbtdb->current_version == NULL) {
7162                 isc_refcount_decrement(&rbtdb->references, NULL);
7163                 isc_refcount_destroy(&rbtdb->references);
7164                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7165                 return (ISC_R_NOMEMORY);
7166         }
7167         rbtdb->current_version->secure = dns_db_insecure;
7168         rbtdb->current_version->havensec3 = ISC_FALSE;
7169         rbtdb->current_version->flags = 0;
7170         rbtdb->current_version->iterations = 0;
7171         rbtdb->current_version->hash = 0;
7172         rbtdb->current_version->salt_length = 0;
7173         memset(rbtdb->current_version->salt, 0,
7174                sizeof(rbtdb->current_version->salt));
7175         rbtdb->future_version = NULL;
7176         ISC_LIST_INIT(rbtdb->open_versions);
7177         /*
7178          * Keep the current version in the open list so that list operation
7179          * won't happen in normal lookup operations.
7180          */
7181         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7182
7183         rbtdb->common.magic = DNS_DB_MAGIC;
7184         rbtdb->common.impmagic = RBTDB_MAGIC;
7185
7186         *dbp = (dns_db_t *)rbtdb;
7187
7188         return (ISC_R_SUCCESS);
7189
7190  cleanup_deadnodes:
7191         isc_mem_put(mctx, rbtdb->deadnodes,
7192                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7193
7194  cleanup_heaps:
7195         if (rbtdb->heaps != NULL) {
7196                 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7197                         if (rbtdb->heaps[i] != NULL)
7198                                 isc_heap_destroy(&rbtdb->heaps[i]);
7199                 isc_mem_put(mctx, rbtdb->heaps,
7200                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
7201         }
7202
7203  cleanup_rdatasets:
7204         if (rbtdb->rdatasets != NULL)
7205                 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7206                             sizeof(rdatasetheaderlist_t));
7207  cleanup_rrsetstats:
7208         if (rbtdb->rrsetstats != NULL)
7209                 dns_stats_detach(&rbtdb->rrsetstats);
7210
7211  cleanup_node_locks:
7212         isc_mem_put(mctx, rbtdb->node_locks,
7213                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7214
7215  cleanup_tree_lock:
7216         isc_rwlock_destroy(&rbtdb->tree_lock);
7217
7218  cleanup_lock:
7219         RBTDB_DESTROYLOCK(&rbtdb->lock);
7220
7221  cleanup_rbtdb:
7222         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
7223         return (result);
7224 }
7225
7226
7227 /*
7228  * Slabbed Rdataset Methods
7229  */
7230
7231 static void
7232 rdataset_disassociate(dns_rdataset_t *rdataset) {
7233         dns_db_t *db = rdataset->private1;
7234         dns_dbnode_t *node = rdataset->private2;
7235
7236         detachnode(db, &node);
7237 }
7238
7239 static isc_result_t
7240 rdataset_first(dns_rdataset_t *rdataset) {
7241         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7242         unsigned int count;
7243
7244         count = raw[0] * 256 + raw[1];
7245         if (count == 0) {
7246                 rdataset->private5 = NULL;
7247                 return (ISC_R_NOMORE);
7248         }
7249
7250 #if DNS_RDATASET_FIXED
7251         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7252                 raw += 2 + (4 * count);
7253         else
7254 #endif
7255                 raw += 2;
7256
7257         /*
7258          * The privateuint4 field is the number of rdata beyond the
7259          * cursor position, so we decrement the total count by one
7260          * before storing it.
7261          *
7262          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7263          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7264          * to the first entry in the offset table.
7265          */
7266         count--;
7267         rdataset->privateuint4 = count;
7268         rdataset->private5 = raw;
7269
7270         return (ISC_R_SUCCESS);
7271 }
7272
7273 static isc_result_t
7274 rdataset_next(dns_rdataset_t *rdataset) {
7275         unsigned int count;
7276         unsigned int length;
7277         unsigned char *raw;     /* RDATASLAB */
7278
7279         count = rdataset->privateuint4;
7280         if (count == 0)
7281                 return (ISC_R_NOMORE);
7282         count--;
7283         rdataset->privateuint4 = count;
7284
7285         /*
7286          * Skip forward one record (length + 4) or one offset (4).
7287          */
7288         raw = rdataset->private5;
7289 #if DNS_RDATASET_FIXED
7290         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7291 #endif
7292                 length = raw[0] * 256 + raw[1];
7293                 raw += length;
7294 #if DNS_RDATASET_FIXED
7295         }
7296         rdataset->private5 = raw + 4;           /* length(2) + order(2) */
7297 #else
7298         rdataset->private5 = raw + 2;           /* length(2) */
7299 #endif
7300
7301         return (ISC_R_SUCCESS);
7302 }
7303
7304 static void
7305 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7306         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
7307 #if DNS_RDATASET_FIXED
7308         unsigned int offset;
7309 #endif
7310         unsigned int length;
7311         isc_region_t r;
7312         unsigned int flags = 0;
7313
7314         REQUIRE(raw != NULL);
7315
7316         /*
7317          * Find the start of the record if not already in private5
7318          * then skip the length and order fields.
7319          */
7320 #if DNS_RDATASET_FIXED
7321         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7322                 offset = (raw[0] << 24) + (raw[1] << 16) +
7323                          (raw[2] << 8) + raw[3];
7324                 raw = rdataset->private3;
7325                 raw += offset;
7326         }
7327 #endif
7328         length = raw[0] * 256 + raw[1];
7329 #if DNS_RDATASET_FIXED
7330         raw += 4;
7331 #else
7332         raw += 2;
7333 #endif
7334         if (rdataset->type == dns_rdatatype_rrsig) {
7335                 if (*raw & DNS_RDATASLAB_OFFLINE)
7336                         flags |= DNS_RDATA_OFFLINE;
7337                 length--;
7338                 raw++;
7339         }
7340         r.length = length;
7341         r.base = raw;
7342         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7343         rdata->flags |= flags;
7344 }
7345
7346 static void
7347 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7348         dns_db_t *db = source->private1;
7349         dns_dbnode_t *node = source->private2;
7350         dns_dbnode_t *cloned_node = NULL;
7351
7352         attachnode(db, node, &cloned_node);
7353         *target = *source;
7354
7355         /*
7356          * Reset iterator state.
7357          */
7358         target->privateuint4 = 0;
7359         target->private5 = NULL;
7360 }
7361
7362 static unsigned int
7363 rdataset_count(dns_rdataset_t *rdataset) {
7364         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7365         unsigned int count;
7366
7367         count = raw[0] * 256 + raw[1];
7368
7369         return (count);
7370 }
7371
7372 static isc_result_t
7373 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7374                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7375 {
7376         dns_db_t *db = rdataset->private1;
7377         dns_dbnode_t *node = rdataset->private2;
7378         dns_dbnode_t *cloned_node;
7379         struct noqname *noqname = rdataset->private6;
7380
7381         cloned_node = NULL;
7382         attachnode(db, node, &cloned_node);
7383         nsec->methods = &rdataset_methods;
7384         nsec->rdclass = db->rdclass;
7385         nsec->type = noqname->type;
7386         nsec->covers = 0;
7387         nsec->ttl = rdataset->ttl;
7388         nsec->trust = rdataset->trust;
7389         nsec->private1 = rdataset->private1;
7390         nsec->private2 = rdataset->private2;
7391         nsec->private3 = noqname->neg;
7392         nsec->privateuint4 = 0;
7393         nsec->private5 = NULL;
7394         nsec->private6 = NULL;
7395         nsec->private7 = NULL;
7396
7397         cloned_node = NULL;
7398         attachnode(db, node, &cloned_node);
7399         nsecsig->methods = &rdataset_methods;
7400         nsecsig->rdclass = db->rdclass;
7401         nsecsig->type = dns_rdatatype_rrsig;
7402         nsecsig->covers = noqname->type;
7403         nsecsig->ttl = rdataset->ttl;
7404         nsecsig->trust = rdataset->trust;
7405         nsecsig->private1 = rdataset->private1;
7406         nsecsig->private2 = rdataset->private2;
7407         nsecsig->private3 = noqname->negsig;
7408         nsecsig->privateuint4 = 0;
7409         nsecsig->private5 = NULL;
7410         nsec->private6 = NULL;
7411         nsec->private7 = NULL;
7412
7413         dns_name_clone(&noqname->name, name);
7414
7415         return (ISC_R_SUCCESS);
7416 }
7417
7418 static isc_result_t
7419 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7420                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7421 {
7422         dns_db_t *db = rdataset->private1;
7423         dns_dbnode_t *node = rdataset->private2;
7424         dns_dbnode_t *cloned_node;
7425         struct noqname *closest = rdataset->private7;
7426
7427         cloned_node = NULL;
7428         attachnode(db, node, &cloned_node);
7429         nsec->methods = &rdataset_methods;
7430         nsec->rdclass = db->rdclass;
7431         nsec->type = closest->type;
7432         nsec->covers = 0;
7433         nsec->ttl = rdataset->ttl;
7434         nsec->trust = rdataset->trust;
7435         nsec->private1 = rdataset->private1;
7436         nsec->private2 = rdataset->private2;
7437         nsec->private3 = closest->neg;
7438         nsec->privateuint4 = 0;
7439         nsec->private5 = NULL;
7440         nsec->private6 = NULL;
7441         nsec->private7 = NULL;
7442
7443         cloned_node = NULL;
7444         attachnode(db, node, &cloned_node);
7445         nsecsig->methods = &rdataset_methods;
7446         nsecsig->rdclass = db->rdclass;
7447         nsecsig->type = dns_rdatatype_rrsig;
7448         nsecsig->covers = closest->type;
7449         nsecsig->ttl = rdataset->ttl;
7450         nsecsig->trust = rdataset->trust;
7451         nsecsig->private1 = rdataset->private1;
7452         nsecsig->private2 = rdataset->private2;
7453         nsecsig->private3 = closest->negsig;
7454         nsecsig->privateuint4 = 0;
7455         nsecsig->private5 = NULL;
7456         nsec->private6 = NULL;
7457         nsec->private7 = NULL;
7458
7459         dns_name_clone(&closest->name, name);
7460
7461         return (ISC_R_SUCCESS);
7462 }
7463
7464 static void
7465 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
7466         dns_rbtdb_t *rbtdb = rdataset->private1;
7467         dns_rbtnode_t *rbtnode = rdataset->private2;
7468         rdatasetheader_t *header = rdataset->private3;
7469
7470         header--;
7471         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7472                   isc_rwlocktype_write);
7473         header->trust = rdataset->trust = trust;
7474         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7475                   isc_rwlocktype_write);
7476 }
7477
7478 static void
7479 rdataset_expire(dns_rdataset_t *rdataset) {
7480         dns_rbtdb_t *rbtdb = rdataset->private1;
7481         dns_rbtnode_t *rbtnode = rdataset->private2;
7482         rdatasetheader_t *header = rdataset->private3;
7483
7484         header--;
7485         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7486                   isc_rwlocktype_write);
7487         expire_header(rbtdb, header, ISC_FALSE);
7488         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7489                   isc_rwlocktype_write);
7490 }
7491
7492 /*
7493  * Rdataset Iterator Methods
7494  */
7495
7496 static void
7497 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7498         rbtdb_rdatasetiter_t *rbtiterator;
7499
7500         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7501
7502         if (rbtiterator->common.version != NULL)
7503                 closeversion(rbtiterator->common.db,
7504                              &rbtiterator->common.version, ISC_FALSE);
7505         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7506         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7507                     sizeof(*rbtiterator));
7508
7509         *iteratorp = NULL;
7510 }
7511
7512 static isc_result_t
7513 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7514         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7515         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7516         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7517         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7518         rdatasetheader_t *header, *top_next;
7519         rbtdb_serial_t serial;
7520         isc_stdtime_t now;
7521
7522         if (IS_CACHE(rbtdb)) {
7523                 serial = 1;
7524                 now = rbtiterator->common.now;
7525         } else {
7526                 serial = rbtversion->serial;
7527                 now = 0;
7528         }
7529
7530         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7531                   isc_rwlocktype_read);
7532
7533         for (header = rbtnode->data; header != NULL; header = top_next) {
7534                 top_next = header->next;
7535                 do {
7536                         if (header->serial <= serial && !IGNORE(header)) {
7537                                 /*
7538                                  * Is this a "this rdataset doesn't exist"
7539                                  * record?  Or is it too old in the cache?
7540                                  *
7541                                  * Note: unlike everywhere else, we
7542                                  * check for now > header->rdh_ttl instead
7543                                  * of now >= header->rdh_ttl.  This allows
7544                                  * ANY and RRSIG queries for 0 TTL
7545                                  * rdatasets to work.
7546                                  */
7547                                 if (NONEXISTENT(header) ||
7548                                     (now != 0 && now > header->rdh_ttl))
7549                                         header = NULL;
7550                                 break;
7551                         } else
7552                                 header = header->down;
7553                 } while (header != NULL);
7554                 if (header != NULL)
7555                         break;
7556         }
7557
7558         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7559                     isc_rwlocktype_read);
7560
7561         rbtiterator->current = header;
7562
7563         if (header == NULL)
7564                 return (ISC_R_NOMORE);
7565
7566         return (ISC_R_SUCCESS);
7567 }
7568
7569 static isc_result_t
7570 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7571         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7572         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7573         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7574         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7575         rdatasetheader_t *header, *top_next;
7576         rbtdb_serial_t serial;
7577         isc_stdtime_t now;
7578         rbtdb_rdatatype_t type, negtype;
7579         dns_rdatatype_t rdtype, covers;
7580
7581         header = rbtiterator->current;
7582         if (header == NULL)
7583                 return (ISC_R_NOMORE);
7584
7585         if (IS_CACHE(rbtdb)) {
7586                 serial = 1;
7587                 now = rbtiterator->common.now;
7588         } else {
7589                 serial = rbtversion->serial;
7590                 now = 0;
7591         }
7592
7593         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7594                   isc_rwlocktype_read);
7595
7596         type = header->type;
7597         rdtype = RBTDB_RDATATYPE_BASE(header->type);
7598         if (rdtype == 0) {
7599                 covers = RBTDB_RDATATYPE_EXT(header->type);
7600                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7601         } else
7602                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7603         for (header = header->next; header != NULL; header = top_next) {
7604                 top_next = header->next;
7605                 /*
7606                  * If not walking back up the down list.
7607                  */
7608                 if (header->type != type && header->type != negtype) {
7609                         do {
7610                                 if (header->serial <= serial &&
7611                                     !IGNORE(header)) {
7612                                         /*
7613                                          * Is this a "this rdataset doesn't
7614                                          * exist" record?
7615                                          *
7616                                          * Note: unlike everywhere else, we
7617                                          * check for now > header->ttl instead
7618                                          * of now >= header->ttl.  This allows
7619                                          * ANY and RRSIG queries for 0 TTL
7620                                          * rdatasets to work.
7621                                          */
7622                                         if ((header->attributes &
7623                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
7624                                             (now != 0 && now > header->rdh_ttl))
7625                                                 header = NULL;
7626                                         break;
7627                                 } else
7628                                         header = header->down;
7629                         } while (header != NULL);
7630                         if (header != NULL)
7631                                 break;
7632                 }
7633         }
7634
7635         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7636                     isc_rwlocktype_read);
7637
7638         rbtiterator->current = header;
7639
7640         if (header == NULL)
7641                 return (ISC_R_NOMORE);
7642
7643         return (ISC_R_SUCCESS);
7644 }
7645
7646 static void
7647 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7648         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7649         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7650         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7651         rdatasetheader_t *header;
7652
7653         header = rbtiterator->current;
7654         REQUIRE(header != NULL);
7655
7656         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7657                   isc_rwlocktype_read);
7658
7659         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7660                       rdataset);
7661
7662         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7663                     isc_rwlocktype_read);
7664 }
7665
7666
7667 /*
7668  * Database Iterator Methods
7669  */
7670
7671 static inline void
7672 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7673         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7674         dns_rbtnode_t *node = rbtdbiter->node;
7675
7676         if (node == NULL)
7677                 return;
7678
7679         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7680         reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7681 }
7682
7683 static inline void
7684 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7685         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7686         dns_rbtnode_t *node = rbtdbiter->node;
7687         nodelock_t *lock;
7688
7689         if (node == NULL)
7690                 return;
7691
7692         lock = &rbtdb->node_locks[node->locknum].lock;
7693         NODE_LOCK(lock, isc_rwlocktype_read);
7694         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7695                             rbtdbiter->tree_locked, ISC_FALSE);
7696         NODE_UNLOCK(lock, isc_rwlocktype_read);
7697
7698         rbtdbiter->node = NULL;
7699 }
7700
7701 static void
7702 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7703         dns_rbtnode_t *node;
7704         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7705         isc_boolean_t was_read_locked = ISC_FALSE;
7706         nodelock_t *lock;
7707         int i;
7708
7709         if (rbtdbiter->delete != 0) {
7710                 /*
7711                  * Note that "%d node of %d in tree" can report things like
7712                  * "flush_deletions: 59 nodes of 41 in tree".  This means
7713                  * That some nodes appear on the deletions list more than
7714                  * once.  Only the last occurence will actually be deleted.
7715                  */
7716                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7717                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7718                               "flush_deletions: %d nodes of %d in tree",
7719                               rbtdbiter->delete,
7720                               dns_rbt_nodecount(rbtdb->tree));
7721
7722                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7723                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7724                         was_read_locked = ISC_TRUE;
7725                 }
7726                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7727                 rbtdbiter->tree_locked = isc_rwlocktype_write;
7728
7729                 for (i = 0; i < rbtdbiter->delete; i++) {
7730                         node = rbtdbiter->deletions[i];
7731                         lock = &rbtdb->node_locks[node->locknum].lock;
7732
7733                         NODE_LOCK(lock, isc_rwlocktype_read);
7734                         decrement_reference(rbtdb, node, 0,
7735                                             isc_rwlocktype_read,
7736                                             rbtdbiter->tree_locked, ISC_FALSE);
7737                         NODE_UNLOCK(lock, isc_rwlocktype_read);
7738                 }
7739
7740                 rbtdbiter->delete = 0;
7741
7742                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7743                 if (was_read_locked) {
7744                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7745                         rbtdbiter->tree_locked = isc_rwlocktype_read;
7746
7747                 } else {
7748                         rbtdbiter->tree_locked = isc_rwlocktype_none;
7749                 }
7750         }
7751 }
7752
7753 static inline void
7754 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
7755         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7756
7757         REQUIRE(rbtdbiter->paused);
7758         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
7759
7760         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7761         rbtdbiter->tree_locked = isc_rwlocktype_read;
7762
7763         rbtdbiter->paused = ISC_FALSE;
7764 }
7765
7766 static void
7767 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
7768         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
7769         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7770         dns_db_t *db = NULL;
7771
7772         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7773                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7774                 rbtdbiter->tree_locked = isc_rwlocktype_none;
7775         } else
7776                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
7777
7778         dereference_iter_node(rbtdbiter);
7779
7780         flush_deletions(rbtdbiter);
7781
7782         dns_db_attach(rbtdbiter->common.db, &db);
7783         dns_db_detach(&rbtdbiter->common.db);
7784
7785         dns_rbtnodechain_reset(&rbtdbiter->chain);
7786         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7787         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
7788         dns_db_detach(&db);
7789
7790         *iteratorp = NULL;
7791 }
7792
7793 static isc_result_t
7794 dbiterator_first(dns_dbiterator_t *iterator) {
7795         isc_result_t result;
7796         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7797         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7798         dns_name_t *name, *origin;
7799
7800         if (rbtdbiter->result != ISC_R_SUCCESS &&
7801             rbtdbiter->result != ISC_R_NOMORE)
7802                 return (rbtdbiter->result);
7803
7804         if (rbtdbiter->paused)
7805                 resume_iteration(rbtdbiter);
7806
7807         dereference_iter_node(rbtdbiter);
7808
7809         name = dns_fixedname_name(&rbtdbiter->name);
7810         origin = dns_fixedname_name(&rbtdbiter->origin);
7811         dns_rbtnodechain_reset(&rbtdbiter->chain);
7812         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7813
7814         if (rbtdbiter->nsec3only) {
7815                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7816                 result = dns_rbtnodechain_first(rbtdbiter->current,
7817                                                 rbtdb->nsec3, name, origin);
7818         } else {
7819                 rbtdbiter->current = &rbtdbiter->chain;
7820                 result = dns_rbtnodechain_first(rbtdbiter->current,
7821                                                 rbtdb->tree, name, origin);
7822                 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
7823                         rbtdbiter->current = &rbtdbiter->nsec3chain;
7824                         result = dns_rbtnodechain_first(rbtdbiter->current,
7825                                                         rbtdb->nsec3, name,
7826                                                         origin);
7827                 }
7828         }
7829         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7830                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7831                                                   NULL, &rbtdbiter->node);
7832                 if (result == ISC_R_SUCCESS) {
7833                         rbtdbiter->new_origin = ISC_TRUE;
7834                         reference_iter_node(rbtdbiter);
7835                 }
7836         } else {
7837                 INSIST(result == ISC_R_NOTFOUND);
7838                 result = ISC_R_NOMORE; /* The tree is empty. */
7839         }
7840
7841         rbtdbiter->result = result;
7842
7843         return (result);
7844 }
7845
7846 static isc_result_t
7847 dbiterator_last(dns_dbiterator_t *iterator) {
7848         isc_result_t result;
7849         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7850         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7851         dns_name_t *name, *origin;
7852
7853         if (rbtdbiter->result != ISC_R_SUCCESS &&
7854             rbtdbiter->result != ISC_R_NOMORE)
7855                 return (rbtdbiter->result);
7856
7857         if (rbtdbiter->paused)
7858                 resume_iteration(rbtdbiter);
7859
7860         dereference_iter_node(rbtdbiter);
7861
7862         name = dns_fixedname_name(&rbtdbiter->name);
7863         origin = dns_fixedname_name(&rbtdbiter->origin);
7864         dns_rbtnodechain_reset(&rbtdbiter->chain);
7865         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7866
7867         result = ISC_R_NOTFOUND;
7868         if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
7869                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7870                 result = dns_rbtnodechain_last(rbtdbiter->current,
7871                                                rbtdb->nsec3, name, origin);
7872         }
7873         if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
7874                 rbtdbiter->current = &rbtdbiter->chain;
7875                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7876                                                name, origin);
7877         }
7878         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7879                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7880                                                   NULL, &rbtdbiter->node);
7881                 if (result == ISC_R_SUCCESS) {
7882                         rbtdbiter->new_origin = ISC_TRUE;
7883                         reference_iter_node(rbtdbiter);
7884                 }
7885         } else {
7886                 INSIST(result == ISC_R_NOTFOUND);
7887                 result = ISC_R_NOMORE; /* The tree is empty. */
7888         }
7889
7890         rbtdbiter->result = result;
7891
7892         return (result);
7893 }
7894
7895 static isc_result_t
7896 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
7897         isc_result_t result;
7898         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7899         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7900         dns_name_t *iname, *origin;
7901
7902         if (rbtdbiter->result != ISC_R_SUCCESS &&
7903             rbtdbiter->result != ISC_R_NOTFOUND &&
7904             rbtdbiter->result != ISC_R_NOMORE)
7905                 return (rbtdbiter->result);
7906
7907         if (rbtdbiter->paused)
7908                 resume_iteration(rbtdbiter);
7909
7910         dereference_iter_node(rbtdbiter);
7911
7912         iname = dns_fixedname_name(&rbtdbiter->name);
7913         origin = dns_fixedname_name(&rbtdbiter->origin);
7914         dns_rbtnodechain_reset(&rbtdbiter->chain);
7915         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7916
7917         if (rbtdbiter->nsec3only) {
7918                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7919                 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7920                                           &rbtdbiter->node,
7921                                           rbtdbiter->current,
7922                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7923         } else if (rbtdbiter->nonsec3) {
7924                 rbtdbiter->current = &rbtdbiter->chain;
7925                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7926                                           &rbtdbiter->node,
7927                                           rbtdbiter->current,
7928                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7929         } else {
7930                 /*
7931                  * Stay on main chain if not found on either chain.
7932                  */
7933                 rbtdbiter->current = &rbtdbiter->chain;
7934                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7935                                           &rbtdbiter->node,
7936                                           rbtdbiter->current,
7937                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7938                 if (result == DNS_R_PARTIALMATCH) {
7939                         dns_rbtnode_t *node = NULL;
7940                         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7941                                                   &node, &rbtdbiter->nsec3chain,
7942                                                   DNS_RBTFIND_EMPTYDATA,
7943                                                   NULL, NULL);
7944                         if (result == ISC_R_SUCCESS) {
7945                                 rbtdbiter->node = node;
7946                                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7947                         }
7948                 }
7949         }
7950
7951 #if 1
7952         if (result == ISC_R_SUCCESS) {
7953                 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
7954                                                   origin, NULL);
7955                 if (result == ISC_R_SUCCESS) {
7956                         rbtdbiter->new_origin = ISC_TRUE;
7957                         reference_iter_node(rbtdbiter);
7958                 }
7959         } else if (result == DNS_R_PARTIALMATCH) {
7960                 result = ISC_R_NOTFOUND;
7961                 rbtdbiter->node = NULL;
7962         }
7963
7964         rbtdbiter->result = result;
7965 #else
7966         if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7967                 isc_result_t tresult;
7968                 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
7969                                                    origin, NULL);
7970                 if (tresult == ISC_R_SUCCESS) {
7971                         rbtdbiter->new_origin = ISC_TRUE;
7972                         reference_iter_node(rbtdbiter);
7973                 } else {
7974                         result = tresult;
7975                         rbtdbiter->node = NULL;
7976                 }
7977         } else
7978                 rbtdbiter->node = NULL;
7979
7980         rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
7981                             ISC_R_SUCCESS : result;
7982 #endif
7983
7984         return (result);
7985 }
7986
7987 static isc_result_t
7988 dbiterator_prev(dns_dbiterator_t *iterator) {
7989         isc_result_t result;
7990         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7991         dns_name_t *name, *origin;
7992         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7993
7994         REQUIRE(rbtdbiter->node != NULL);
7995
7996         if (rbtdbiter->result != ISC_R_SUCCESS)
7997                 return (rbtdbiter->result);
7998
7999         if (rbtdbiter->paused)
8000                 resume_iteration(rbtdbiter);
8001
8002         name = dns_fixedname_name(&rbtdbiter->name);
8003         origin = dns_fixedname_name(&rbtdbiter->origin);
8004         result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
8005         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8006             !rbtdbiter->nonsec3 &&
8007             &rbtdbiter->nsec3chain == rbtdbiter->current) {
8008                 rbtdbiter->current = &rbtdbiter->chain;
8009                 dns_rbtnodechain_reset(rbtdbiter->current);
8010                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8011                                                name, origin);
8012                 if (result == ISC_R_NOTFOUND)
8013                         result = ISC_R_NOMORE;
8014         }
8015
8016         dereference_iter_node(rbtdbiter);
8017
8018         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8019                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8020                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8021                                                   NULL, &rbtdbiter->node);
8022         }
8023
8024         if (result == ISC_R_SUCCESS)
8025                 reference_iter_node(rbtdbiter);
8026
8027         rbtdbiter->result = result;
8028
8029         return (result);
8030 }
8031
8032 static isc_result_t
8033 dbiterator_next(dns_dbiterator_t *iterator) {
8034         isc_result_t result;
8035         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8036         dns_name_t *name, *origin;
8037         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8038
8039         REQUIRE(rbtdbiter->node != NULL);
8040
8041         if (rbtdbiter->result != ISC_R_SUCCESS)
8042                 return (rbtdbiter->result);
8043
8044         if (rbtdbiter->paused)
8045                 resume_iteration(rbtdbiter);
8046
8047         name = dns_fixedname_name(&rbtdbiter->name);
8048         origin = dns_fixedname_name(&rbtdbiter->origin);
8049         result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8050         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8051             !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8052                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8053                 dns_rbtnodechain_reset(rbtdbiter->current);
8054                 result = dns_rbtnodechain_first(rbtdbiter->current,
8055                                                 rbtdb->nsec3, name, origin);
8056                 if (result == ISC_R_NOTFOUND)
8057                         result = ISC_R_NOMORE;
8058         }
8059
8060         dereference_iter_node(rbtdbiter);
8061
8062         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8063                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8064                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8065                                                   NULL, &rbtdbiter->node);
8066         }
8067         if (result == ISC_R_SUCCESS)
8068                 reference_iter_node(rbtdbiter);
8069
8070         rbtdbiter->result = result;
8071
8072         return (result);
8073 }
8074
8075 static isc_result_t
8076 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8077                    dns_name_t *name)
8078 {
8079         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8080         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8081         dns_rbtnode_t *node = rbtdbiter->node;
8082         isc_result_t result;
8083         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8084         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8085
8086         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8087         REQUIRE(rbtdbiter->node != NULL);
8088
8089         if (rbtdbiter->paused)
8090                 resume_iteration(rbtdbiter);
8091
8092         if (name != NULL) {
8093                 if (rbtdbiter->common.relative_names)
8094                         origin = NULL;
8095                 result = dns_name_concatenate(nodename, origin, name, NULL);
8096                 if (result != ISC_R_SUCCESS)
8097                         return (result);
8098                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8099                         result = DNS_R_NEWORIGIN;
8100         } else
8101                 result = ISC_R_SUCCESS;
8102
8103         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8104         new_reference(rbtdb, node);
8105         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8106
8107         *nodep = rbtdbiter->node;
8108
8109         if (iterator->cleaning && result == ISC_R_SUCCESS) {
8110                 isc_result_t expire_result;
8111
8112                 /*
8113                  * If the deletion array is full, flush it before trying
8114                  * to expire the current node.  The current node can't
8115                  * fully deleted while the iteration cursor is still on it.
8116                  */
8117                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8118                         flush_deletions(rbtdbiter);
8119
8120                 expire_result = expirenode(iterator->db, *nodep, 0);
8121
8122                 /*
8123                  * expirenode() currently always returns success.
8124                  */
8125                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8126                         unsigned int refs;
8127
8128                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
8129                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8130                         dns_rbtnode_refincrement(node, &refs);
8131                         INSIST(refs != 0);
8132                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8133                 }
8134         }
8135
8136         return (result);
8137 }
8138
8139 static isc_result_t
8140 dbiterator_pause(dns_dbiterator_t *iterator) {
8141         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8142         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8143
8144         if (rbtdbiter->result != ISC_R_SUCCESS &&
8145             rbtdbiter->result != ISC_R_NOMORE)
8146                 return (rbtdbiter->result);
8147
8148         if (rbtdbiter->paused)
8149                 return (ISC_R_SUCCESS);
8150
8151         rbtdbiter->paused = ISC_TRUE;
8152
8153         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8154                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8155                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8156                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8157         }
8158
8159         flush_deletions(rbtdbiter);
8160
8161         return (ISC_R_SUCCESS);
8162 }
8163
8164 static isc_result_t
8165 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8166         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8167         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8168
8169         if (rbtdbiter->result != ISC_R_SUCCESS)
8170                 return (rbtdbiter->result);
8171
8172         return (dns_name_copy(origin, name, NULL));
8173 }
8174
8175 /*%
8176  * Additional cache routines.
8177  */
8178 static isc_result_t
8179 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8180                        dns_rdatatype_t qtype, dns_acache_t *acache,
8181                        dns_zone_t **zonep, dns_db_t **dbp,
8182                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8183                        dns_name_t *fname, dns_message_t *msg,
8184                        isc_stdtime_t now)
8185 {
8186         dns_rbtdb_t *rbtdb = rdataset->private1;
8187         dns_rbtnode_t *rbtnode = rdataset->private2;
8188         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8189         unsigned int current_count = rdataset->privateuint4;
8190         unsigned int count;
8191         rdatasetheader_t *header;
8192         nodelock_t *nodelock;
8193         unsigned int total_count;
8194         acachectl_t *acarray;
8195         dns_acacheentry_t *entry;
8196         isc_result_t result;
8197
8198         UNUSED(qtype); /* we do not use this value at least for now */
8199         UNUSED(acache);
8200
8201         header = (struct rdatasetheader *)(raw - sizeof(*header));
8202
8203         total_count = raw[0] * 256 + raw[1];
8204         INSIST(total_count > current_count);
8205         count = total_count - current_count - 1;
8206
8207         acarray = NULL;
8208
8209         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8210         NODE_LOCK(nodelock, isc_rwlocktype_read);
8211
8212         switch (type) {
8213         case dns_rdatasetadditional_fromauth:
8214                 acarray = header->additional_auth;
8215                 break;
8216         case dns_rdatasetadditional_fromcache:
8217                 acarray = NULL;
8218                 break;
8219         case dns_rdatasetadditional_fromglue:
8220                 acarray = header->additional_glue;
8221                 break;
8222         default:
8223                 INSIST(0);
8224         }
8225
8226         if (acarray == NULL) {
8227                 if (type != dns_rdatasetadditional_fromcache)
8228                         dns_acache_countquerymiss(acache);
8229                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8230                 return (ISC_R_NOTFOUND);
8231         }
8232
8233         if (acarray[count].entry == NULL) {
8234                 dns_acache_countquerymiss(acache);
8235                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8236                 return (ISC_R_NOTFOUND);
8237         }
8238
8239         entry = NULL;
8240         dns_acache_attachentry(acarray[count].entry, &entry);
8241
8242         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8243
8244         result = dns_acache_getentry(entry, zonep, dbp, versionp,
8245                                      nodep, fname, msg, now);
8246
8247         dns_acache_detachentry(&entry);
8248
8249         return (result);
8250 }
8251
8252 static void
8253 acache_callback(dns_acacheentry_t *entry, void **arg) {
8254         dns_rbtdb_t *rbtdb;
8255         dns_rbtnode_t *rbtnode;
8256         nodelock_t *nodelock;
8257         acachectl_t *acarray = NULL;
8258         acache_cbarg_t *cbarg;
8259         unsigned int count;
8260
8261         REQUIRE(arg != NULL);
8262         cbarg = *arg;
8263
8264         /*
8265          * The caller must hold the entry lock.
8266          */
8267
8268         rbtdb = (dns_rbtdb_t *)cbarg->db;
8269         rbtnode = (dns_rbtnode_t *)cbarg->node;
8270
8271         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8272         NODE_LOCK(nodelock, isc_rwlocktype_write);
8273
8274         switch (cbarg->type) {
8275         case dns_rdatasetadditional_fromauth:
8276                 acarray = cbarg->header->additional_auth;
8277                 break;
8278         case dns_rdatasetadditional_fromglue:
8279                 acarray = cbarg->header->additional_glue;
8280                 break;
8281         default:
8282                 INSIST(0);
8283         }
8284
8285         count = cbarg->count;
8286         if (acarray != NULL && acarray[count].entry == entry) {
8287                 acarray[count].entry = NULL;
8288                 INSIST(acarray[count].cbarg == cbarg);
8289                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8290                 acarray[count].cbarg = NULL;
8291         } else
8292                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8293
8294         dns_acache_detachentry(&entry);
8295
8296         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8297
8298         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8299         dns_db_detach((dns_db_t **)(void*)&rbtdb);
8300
8301         *arg = NULL;
8302 }
8303
8304 static void
8305 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8306                       acache_cbarg_t **cbargp)
8307 {
8308         acache_cbarg_t *cbarg;
8309
8310         REQUIRE(mctx != NULL);
8311         REQUIRE(entry != NULL);
8312         REQUIRE(cbargp != NULL && *cbargp != NULL);
8313
8314         cbarg = *cbargp;
8315
8316         dns_acache_cancelentry(entry);
8317         dns_db_detachnode(cbarg->db, &cbarg->node);
8318         dns_db_detach(&cbarg->db);
8319
8320         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8321
8322         *cbargp = NULL;
8323 }
8324
8325 static isc_result_t
8326 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8327                        dns_rdatatype_t qtype, dns_acache_t *acache,
8328                        dns_zone_t *zone, dns_db_t *db,
8329                        dns_dbversion_t *version, dns_dbnode_t *node,
8330                        dns_name_t *fname)
8331 {
8332         dns_rbtdb_t *rbtdb = rdataset->private1;
8333         dns_rbtnode_t *rbtnode = rdataset->private2;
8334         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8335         unsigned int current_count = rdataset->privateuint4;
8336         rdatasetheader_t *header;
8337         unsigned int total_count, count;
8338         nodelock_t *nodelock;
8339         isc_result_t result;
8340         acachectl_t *acarray;
8341         dns_acacheentry_t *newentry, *oldentry = NULL;
8342         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8343
8344         UNUSED(qtype);
8345
8346         if (type == dns_rdatasetadditional_fromcache)
8347                 return (ISC_R_SUCCESS);
8348
8349         header = (struct rdatasetheader *)(raw - sizeof(*header));
8350
8351         total_count = raw[0] * 256 + raw[1];
8352         INSIST(total_count > current_count);
8353         count = total_count - current_count - 1; /* should be private data */
8354
8355         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8356         if (newcbarg == NULL)
8357                 return (ISC_R_NOMEMORY);
8358         newcbarg->type = type;
8359         newcbarg->count = count;
8360         newcbarg->header = header;
8361         newcbarg->db = NULL;
8362         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8363         newcbarg->node = NULL;
8364         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8365                           &newcbarg->node);
8366         newentry = NULL;
8367         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8368                                         acache_callback, newcbarg, &newentry);
8369         if (result != ISC_R_SUCCESS)
8370                 goto fail;
8371         /* Set cache data in the new entry. */
8372         result = dns_acache_setentry(acache, newentry, zone, db,
8373                                      version, node, fname);
8374         if (result != ISC_R_SUCCESS)
8375                 goto fail;
8376
8377         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8378         NODE_LOCK(nodelock, isc_rwlocktype_write);
8379
8380         acarray = NULL;
8381         switch (type) {
8382         case dns_rdatasetadditional_fromauth:
8383                 acarray = header->additional_auth;
8384                 break;
8385         case dns_rdatasetadditional_fromglue:
8386                 acarray = header->additional_glue;
8387                 break;
8388         default:
8389                 INSIST(0);
8390         }
8391
8392         if (acarray == NULL) {
8393                 unsigned int i;
8394
8395                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8396                                       sizeof(acachectl_t));
8397
8398                 if (acarray == NULL) {
8399                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8400                         goto fail;
8401                 }
8402
8403                 for (i = 0; i < total_count; i++) {
8404                         acarray[i].entry = NULL;
8405                         acarray[i].cbarg = NULL;
8406                 }
8407         }
8408         switch (type) {
8409         case dns_rdatasetadditional_fromauth:
8410                 header->additional_auth = acarray;
8411                 break;
8412         case dns_rdatasetadditional_fromglue:
8413                 header->additional_glue = acarray;
8414                 break;
8415         default:
8416                 INSIST(0);
8417         }
8418
8419         if (acarray[count].entry != NULL) {
8420                 /*
8421                  * Swap the entry.  Delay cleaning-up the old entry since
8422                  * it would require a node lock.
8423                  */
8424                 oldentry = acarray[count].entry;
8425                 INSIST(acarray[count].cbarg != NULL);
8426                 oldcbarg = acarray[count].cbarg;
8427         }
8428         acarray[count].entry = newentry;
8429         acarray[count].cbarg = newcbarg;
8430
8431         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8432
8433         if (oldentry != NULL) {
8434                 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8435                 dns_acache_detachentry(&oldentry);
8436         }
8437
8438         return (ISC_R_SUCCESS);
8439
8440  fail:
8441         if (newcbarg != NULL) {
8442                 if (newentry != NULL) {
8443                         acache_cancelentry(rbtdb->common.mctx, newentry,
8444                                            &newcbarg);
8445                         dns_acache_detachentry(&newentry);
8446                 } else {
8447                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8448                         dns_db_detach(&newcbarg->db);
8449                         isc_mem_put(rbtdb->common.mctx, newcbarg,
8450                             sizeof(*newcbarg));
8451                 }
8452         }
8453
8454         return (result);
8455 }
8456
8457 static isc_result_t
8458 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8459                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8460 {
8461         dns_rbtdb_t *rbtdb = rdataset->private1;
8462         dns_rbtnode_t *rbtnode = rdataset->private2;
8463         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8464         unsigned int current_count = rdataset->privateuint4;
8465         rdatasetheader_t *header;
8466         nodelock_t *nodelock;
8467         unsigned int total_count, count;
8468         acachectl_t *acarray;
8469         dns_acacheentry_t *entry;
8470         acache_cbarg_t *cbarg;
8471
8472         UNUSED(qtype);          /* we do not use this value at least for now */
8473         UNUSED(acache);
8474
8475         if (type == dns_rdatasetadditional_fromcache)
8476                 return (ISC_R_SUCCESS);
8477
8478         header = (struct rdatasetheader *)(raw - sizeof(*header));
8479
8480         total_count = raw[0] * 256 + raw[1];
8481         INSIST(total_count > current_count);
8482         count = total_count - current_count - 1;
8483
8484         acarray = NULL;
8485         entry = NULL;
8486
8487         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8488         NODE_LOCK(nodelock, isc_rwlocktype_write);
8489
8490         switch (type) {
8491         case dns_rdatasetadditional_fromauth:
8492                 acarray = header->additional_auth;
8493                 break;
8494         case dns_rdatasetadditional_fromglue:
8495                 acarray = header->additional_glue;
8496                 break;
8497         default:
8498                 INSIST(0);
8499         }
8500
8501         if (acarray == NULL) {
8502                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8503                 return (ISC_R_NOTFOUND);
8504         }
8505
8506         entry = acarray[count].entry;
8507         if (entry == NULL) {
8508                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8509                 return (ISC_R_NOTFOUND);
8510         }
8511
8512         acarray[count].entry = NULL;
8513         cbarg = acarray[count].cbarg;
8514         acarray[count].cbarg = NULL;
8515
8516         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8517
8518         if (entry != NULL) {
8519                 if (cbarg != NULL)
8520                         acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8521                 dns_acache_detachentry(&entry);
8522         }
8523
8524         return (ISC_R_SUCCESS);
8525 }
8526
8527 /*%
8528  * Routines for LRU-based cache management.
8529  */
8530
8531 /*%
8532  * See if a given cache entry that is being reused needs to be updated
8533  * in the LRU-list.  From the LRU management point of view, this function is
8534  * expected to return true for almost all cases.  When used with threads,
8535  * however, this may cause a non-negligible performance penalty because a
8536  * writer lock will have to be acquired before updating the list.
8537  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8538  * function returns true if the entry has not been updated for some period of
8539  * time.  We differentiate the NS or glue address case and the others since
8540  * experiments have shown that the former tends to be accessed relatively
8541  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8542  * may cause external queries at a higher level zone, involving more
8543  * transactions).
8544  *
8545  * Caller must hold the node (read or write) lock.
8546  */
8547 static inline isc_boolean_t
8548 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8549         if ((header->attributes &
8550              (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8551                 return (ISC_FALSE);
8552
8553 #if DNS_RBTDB_LIMITLRUUPDATE
8554         if (header->type == dns_rdatatype_ns ||
8555             (header->trust == dns_trust_glue &&
8556              (header->type == dns_rdatatype_a ||
8557               header->type == dns_rdatatype_aaaa))) {
8558                 /*
8559                  * Glue records are updated if at least 60 seconds have passed
8560                  * since the previous update time.
8561                  */
8562                 return (header->last_used + 60 <= now);
8563         }
8564
8565         /* Other records are updated if 5 minutes have passed. */
8566         return (header->last_used + 300 <= now);
8567 #else
8568         UNUSED(now);
8569
8570         return (ISC_TRUE);
8571 #endif
8572 }
8573
8574 /*%
8575  * Update the timestamp of a given cache entry and move it to the head
8576  * of the corresponding LRU list.
8577  *
8578  * Caller must hold the node (write) lock.
8579  *
8580  * Note that the we do NOT touch the heap here, as the TTL has not changed.
8581  */
8582 static void
8583 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8584               isc_stdtime_t now)
8585 {
8586         INSIST(IS_CACHE(rbtdb));
8587
8588         /* To be checked: can we really assume this? XXXMLG */
8589         INSIST(ISC_LINK_LINKED(header, link));
8590
8591         ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
8592         header->last_used = now;
8593         ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
8594 }
8595
8596 /*%
8597  * Purge some expired and/or stale (i.e. unused for some period) cache entries
8598  * under an overmem condition.  To recover from this condition quickly, up to
8599  * 2 entries will be purged.  This process is triggered while adding a new
8600  * entry, and we specifically avoid purging entries in the same LRU bucket as
8601  * the one to which the new entry will belong.  Otherwise, we might purge
8602  * entries of the same name of different RR types while adding RRsets from a
8603  * single response (consider the case where we're adding A and AAAA glue records
8604  * of the same NS name).
8605  */
8606 static void
8607 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8608               isc_stdtime_t now, isc_boolean_t tree_locked)
8609 {
8610         rdatasetheader_t *header, *header_prev;
8611         unsigned int locknum;
8612         int purgecount = 2;
8613
8614         for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8615              locknum != locknum_start && purgecount > 0;
8616              locknum = (locknum + 1) % rbtdb->node_lock_count) {
8617                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8618                           isc_rwlocktype_write);
8619
8620                 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8621                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8622                         expire_header(rbtdb, header, tree_locked);
8623                         purgecount--;
8624                 }
8625
8626                 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8627                      header != NULL && purgecount > 0;
8628                      header = header_prev) {
8629                         header_prev = ISC_LIST_PREV(header, link);
8630                         /*
8631                          * Unlink the entry at this point to avoid checking it
8632                          * again even if it's currently used someone else and
8633                          * cannot be purged at this moment.  This entry won't be
8634                          * referenced any more (so unlinking is safe) since the
8635                          * TTL was reset to 0.
8636                          */
8637                         ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8638                                         link);
8639                         expire_header(rbtdb, header, tree_locked);
8640                         purgecount--;
8641                 }
8642
8643                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8644                                     isc_rwlocktype_write);
8645         }
8646 }
8647
8648 static void
8649 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8650               isc_boolean_t tree_locked)
8651 {
8652         set_ttl(rbtdb, header, 0);
8653         header->attributes |= RDATASET_ATTR_STALE;
8654         header->node->dirty = 1;
8655
8656         /*
8657          * Caller must hold the node (write) lock.
8658          */
8659
8660         if (dns_rbtnode_refcurrent(header->node) == 0) {
8661                 /*
8662                  * If no one else is using the node, we can clean it up now.
8663                  * We first need to gain a new reference to the node to meet a
8664                  * requirement of decrement_reference().
8665                  */
8666                 new_reference(rbtdb, header->node);
8667                 decrement_reference(rbtdb, header->node, 0,
8668                                     isc_rwlocktype_write,
8669                                     tree_locked ? isc_rwlocktype_write :
8670                                     isc_rwlocktype_none, ISC_FALSE);
8671         }
8672 }