]> CyberLeo.Net >> Repos - FreeBSD/stable/8.git/blob - contrib/bind9/lib/dns/rbtdb.c
Update to version 9.6-ESV-R5 which contains various bug fixes
[FreeBSD/stable/8.git] / contrib / bind9 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004-2011  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: rbtdb.c,v 1.270.12.32 2011-06-09 00:16:35 each Exp $ */
19
20 /*! \file */
21
22 /*
23  * Principal Author: Bob Halley
24  */
25
26 #include <config.h>
27
28 /* #define inline */
29
30 #include <isc/event.h>
31 #include <isc/heap.h>
32 #include <isc/mem.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
41 #include <isc/task.h>
42 #include <isc/time.h>
43 #include <isc/util.h>
44
45 #include <dns/acache.h>
46 #include <dns/db.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
50 #include <dns/lib.h>
51 #include <dns/log.h>
52 #include <dns/masterdump.h>
53 #include <dns/nsec.h>
54 #include <dns/nsec3.h>
55 #include <dns/rbt.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
63 #include <dns/view.h>
64 #include <dns/zone.h>
65 #include <dns/zonekey.h>
66
67 #ifdef DNS_RBTDB_VERSION64
68 #include "rbtdb64.h"
69 #else
70 #include "rbtdb.h"
71 #endif
72
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
75 #else
76 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
77 #endif
78
79 /*%
80  * Note that "impmagic" is not the first four bytes of the struct, so
81  * ISC_MAGIC_VALID cannot be used.
82  */
83 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
84                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
85
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t                    rbtdb_serial_t;
88 /*%
89  * Make casting easier in symbolic debuggers by using different names
90  * for the 64 bit version.
91  */
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
95 #else
96 typedef isc_uint32_t                    rbtdb_serial_t;
97 #endif
98
99 typedef isc_uint32_t                    rbtdb_rdatatype_t;
100
101 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
104
105 #define RBTDB_RDATATYPE_SIGNSEC \
106                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_NCACHEANY \
116                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
117
118 /*
119  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
120  * Using rwlock is effective with regard to lookup performance only when
121  * it is implemented in an efficient way.
122  * Otherwise, it is generally wise to stick to the simple locking since rwlock
123  * would require more memory or can even make lookups slower due to its own
124  * overhead (when it internally calls mutex locks).
125  */
126 #ifdef ISC_RWLOCK_USEATOMIC
127 #define DNS_RBTDB_USERWLOCK 1
128 #else
129 #define DNS_RBTDB_USERWLOCK 0
130 #endif
131
132 #if DNS_RBTDB_USERWLOCK
133 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
134 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
135 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
136 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
137 #else
138 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
139 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
140 #define RBTDB_LOCK(l, t)        LOCK(l)
141 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
142 #endif
143
144 /*
145  * Since node locking is sensitive to both performance and memory footprint,
146  * we need some trick here.  If we have both high-performance rwlock and
147  * high performance and small-memory reference counters, we use rwlock for
148  * node lock and isc_refcount for node references.  In this case, we don't have
149  * to protect the access to the counters by locks.
150  * Otherwise, we simply use ordinary mutex lock for node locking, and use
151  * simple integers as reference counters which is protected by the lock.
152  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
153  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
154  * counters first and then protect other parts of a node as read-only data.
155  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
156  * provided for these special cases.  When we can use the efficient backend
157  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
158  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
159  * section including the access to the reference counter.
160  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
161  * section is also protected by NODE_STRONGLOCK().
162  */
163 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
164 typedef isc_rwlock_t nodelock_t;
165
166 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
167 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
168 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
169 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
170 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
171
172 #define NODE_STRONGLOCK(l)      ((void)0)
173 #define NODE_STRONGUNLOCK(l)    ((void)0)
174 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
175 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
176 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
177 #else
178 typedef isc_mutex_t nodelock_t;
179
180 #define NODE_INITLOCK(l)        isc_mutex_init(l)
181 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
182 #define NODE_LOCK(l, t)         LOCK(l)
183 #define NODE_UNLOCK(l, t)       UNLOCK(l)
184 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
185
186 #define NODE_STRONGLOCK(l)      LOCK(l)
187 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
188 #define NODE_WEAKLOCK(l, t)     ((void)0)
189 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
190 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
191 #endif
192
193 /*%
194  * Whether to rate-limit updating the LRU to avoid possible thread contention.
195  * Our performance measurement has shown the cost is marginal, so it's defined
196  * to be 0 by default either with or without threads.
197  */
198 #ifndef DNS_RBTDB_LIMITLRUUPDATE
199 #define DNS_RBTDB_LIMITLRUUPDATE 0
200 #endif
201
202 /*
203  * Allow clients with a virtual time of up to 5 minutes in the past to see
204  * records that would have otherwise have expired.
205  */
206 #define RBTDB_VIRTUAL 300
207
208 struct noqname {
209         dns_name_t      name;
210         void *          neg;
211         void *          negsig;
212         dns_rdatatype_t type;
213 };
214
215 typedef struct acachectl acachectl_t;
216
217 typedef struct rdatasetheader {
218         /*%
219          * Locked by the owning node's lock.
220          */
221         rbtdb_serial_t                  serial;
222         dns_ttl_t                       rdh_ttl;
223         rbtdb_rdatatype_t               type;
224         isc_uint16_t                    attributes;
225         dns_trust_t                     trust;
226         struct noqname                  *noqname;
227         struct noqname                  *closest;
228         /*%<
229          * We don't use the LIST macros, because the LIST structure has
230          * both head and tail pointers, and is doubly linked.
231          */
232
233         struct rdatasetheader           *next;
234         /*%<
235          * If this is the top header for an rdataset, 'next' points
236          * to the top header for the next rdataset (i.e., the next type).
237          * Otherwise, it points up to the header whose down pointer points
238          * at this header.
239          */
240
241         struct rdatasetheader           *down;
242         /*%<
243          * Points to the header for the next older version of
244          * this rdataset.
245          */
246
247         isc_uint32_t                    count;
248         /*%<
249          * Monotonously increased every time this rdataset is bound so that
250          * it is used as the base of the starting point in DNS responses
251          * when the "cyclic" rrset-order is required.  Since the ordering
252          * should not be so crucial, no lock is set for the counter for
253          * performance reasons.
254          */
255
256         acachectl_t                     *additional_auth;
257         acachectl_t                     *additional_glue;
258
259         dns_rbtnode_t                   *node;
260         isc_stdtime_t                   last_used;
261         ISC_LINK(struct rdatasetheader) link;
262
263         unsigned int                    heap_index;
264         /*%<
265          * Used for TTL-based cache cleaning.
266          */
267         isc_stdtime_t                   resign;
268 } rdatasetheader_t;
269
270 typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
271 typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;
272
273 #define RDATASET_ATTR_NONEXISTENT       0x0001
274 #define RDATASET_ATTR_STALE             0x0002
275 #define RDATASET_ATTR_IGNORE            0x0004
276 #define RDATASET_ATTR_RETAIN            0x0008
277 #define RDATASET_ATTR_NXDOMAIN          0x0010
278 #define RDATASET_ATTR_RESIGN            0x0020
279 #define RDATASET_ATTR_STATCOUNT         0x0040
280 #define RDATASET_ATTR_OPTOUT            0x0080
281 #define RDATASET_ATTR_NEGATIVE          0x0100
282
283 typedef struct acache_cbarg {
284         dns_rdatasetadditional_t        type;
285         unsigned int                    count;
286         dns_db_t                        *db;
287         dns_dbnode_t                    *node;
288         rdatasetheader_t                *header;
289 } acache_cbarg_t;
290
291 struct acachectl {
292         dns_acacheentry_t               *entry;
293         acache_cbarg_t                  *cbarg;
294 };
295
296 /*
297  * XXX
298  * When the cache will pre-expire data (due to memory low or other
299  * situations) before the rdataset's TTL has expired, it MUST
300  * respect the RETAIN bit and not expire the data until its TTL is
301  * expired.
302  */
303
304 #undef IGNORE                   /* WIN32 winbase.h defines this. */
305
306 #define EXISTS(header) \
307         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
308 #define NONEXISTENT(header) \
309         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
310 #define IGNORE(header) \
311         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
312 #define RETAIN(header) \
313         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
314 #define NXDOMAIN(header) \
315         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
316 #define RESIGN(header) \
317         (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
318 #define OPTOUT(header) \
319         (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
320 #define NEGATIVE(header) \
321         (((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
322
323 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
324
325 /*%
326  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
327  * There is a tradeoff issue about configuring this value: if this is too
328  * small, it may cause heavier contention between threads; if this is too large,
329  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
330  * The default value should work well for most environments, but this can
331  * also be configurable at compilation time via the
332  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
333  * 1 due to the assumption of overmem_purge().
334  */
335 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
336 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
337 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
338 #else
339 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
340 #endif
341 #else
342 #define DEFAULT_CACHE_NODE_LOCK_COUNT   16
343 #endif  /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
344
345 typedef struct {
346         nodelock_t                      lock;
347         /* Protected in the refcount routines. */
348         isc_refcount_t                  references;
349         /* Locked by lock. */
350         isc_boolean_t                   exiting;
351 } rbtdb_nodelock_t;
352
353 typedef struct rbtdb_changed {
354         dns_rbtnode_t *                 node;
355         isc_boolean_t                   dirty;
356         ISC_LINK(struct rbtdb_changed)  link;
357 } rbtdb_changed_t;
358
359 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
360
361 typedef enum {
362         dns_db_insecure,
363         dns_db_partial,
364         dns_db_secure
365 } dns_db_secure_t;
366
367 typedef struct rbtdb_version {
368         /* Not locked */
369         rbtdb_serial_t                  serial;
370         /*
371          * Protected in the refcount routines.
372          * XXXJT: should we change the lock policy based on the refcount
373          * performance?
374          */
375         isc_refcount_t                  references;
376         /* Locked by database lock. */
377         isc_boolean_t                   writer;
378         isc_boolean_t                   commit_ok;
379         rbtdb_changedlist_t             changed_list;
380         rdatasetheaderlist_t            resigned_list;
381         ISC_LINK(struct rbtdb_version)  link;
382         dns_db_secure_t                 secure;
383         isc_boolean_t                   havensec3;
384         /* NSEC3 parameters */
385         dns_hash_t                      hash;
386         isc_uint8_t                     flags;
387         isc_uint16_t                    iterations;
388         isc_uint8_t                     salt_length;
389         unsigned char                   salt[DNS_NSEC3_SALTSIZE];
390 } rbtdb_version_t;
391
392 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
393
394 typedef struct {
395         /* Unlocked. */
396         dns_db_t                        common;
397         /* Locks the data in this struct */
398 #if DNS_RBTDB_USERWLOCK
399         isc_rwlock_t                    lock;
400 #else
401         isc_mutex_t                     lock;
402 #endif
403         /* Locks the tree structure (prevents nodes appearing/disappearing) */
404         isc_rwlock_t                    tree_lock;
405         /* Locks for individual tree nodes */
406         unsigned int                    node_lock_count;
407         rbtdb_nodelock_t *              node_locks;
408         dns_rbtnode_t *                 origin_node;
409         dns_stats_t *                   rrsetstats; /* cache DB only */
410         /* Locked by lock. */
411         unsigned int                    active;
412         isc_refcount_t                  references;
413         unsigned int                    attributes;
414         rbtdb_serial_t                  current_serial;
415         rbtdb_serial_t                  least_serial;
416         rbtdb_serial_t                  next_serial;
417         rbtdb_version_t *               current_version;
418         rbtdb_version_t *               future_version;
419         rbtdb_versionlist_t             open_versions;
420         isc_task_t *                    task;
421         dns_dbnode_t                    *soanode;
422         dns_dbnode_t                    *nsnode;
423
424         /*
425          * This is a linked list used to implement the LRU cache.  There will
426          * be node_lock_count linked lists here.  Nodes in bucket 1 will be
427          * placed on the linked list rdatasets[1].
428          */
429         rdatasetheaderlist_t            *rdatasets;
430
431         /*%
432          * Temporary storage for stale cache nodes and dynamically deleted
433          * nodes that await being cleaned up.
434          */
435         rbtnodelist_t                   *deadnodes;
436
437         /*
438          * Heaps.  These are used for TTL based expiry in a cache,
439          * or for zone resigning in a zone DB.  hmctx is the memory
440          * context to use for the heap (which differs from the main
441          * database memory context in the case of a cache).
442          */
443         isc_mem_t *                     hmctx;
444         isc_heap_t                      **heaps;
445
446         /* Locked by tree_lock. */
447         dns_rbt_t *                     tree;
448         dns_rbt_t *                     nsec3;
449
450         /* Unlocked */
451         unsigned int                    quantum;
452 } dns_rbtdb_t;
453
454 #define RBTDB_ATTR_LOADED               0x01
455 #define RBTDB_ATTR_LOADING              0x02
456
457 /*%
458  * Search Context
459  */
460 typedef struct {
461         dns_rbtdb_t *           rbtdb;
462         rbtdb_version_t *       rbtversion;
463         rbtdb_serial_t          serial;
464         unsigned int            options;
465         dns_rbtnodechain_t      chain;
466         isc_boolean_t           copy_name;
467         isc_boolean_t           need_cleanup;
468         isc_boolean_t           wild;
469         dns_rbtnode_t *         zonecut;
470         rdatasetheader_t *      zonecut_rdataset;
471         rdatasetheader_t *      zonecut_sigrdataset;
472         dns_fixedname_t         zonecut_name;
473         isc_stdtime_t           now;
474 } rbtdb_search_t;
475
476 /*%
477  * Load Context
478  */
479 typedef struct {
480         dns_rbtdb_t *           rbtdb;
481         isc_stdtime_t           now;
482 } rbtdb_load_t;
483
484 static void rdataset_disassociate(dns_rdataset_t *rdataset);
485 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
486 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
487 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
488 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
489 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
490 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
491                                         dns_name_t *name,
492                                         dns_rdataset_t *neg,
493                                         dns_rdataset_t *negsig);
494 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
495                                         dns_name_t *name,
496                                         dns_rdataset_t *neg,
497                                         dns_rdataset_t *negsig);
498 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
499                                            dns_rdatasetadditional_t type,
500                                            dns_rdatatype_t qtype,
501                                            dns_acache_t *acache,
502                                            dns_zone_t **zonep,
503                                            dns_db_t **dbp,
504                                            dns_dbversion_t **versionp,
505                                            dns_dbnode_t **nodep,
506                                            dns_name_t *fname,
507                                            dns_message_t *msg,
508                                            isc_stdtime_t now);
509 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
510                                            dns_rdatasetadditional_t type,
511                                            dns_rdatatype_t qtype,
512                                            dns_acache_t *acache,
513                                            dns_zone_t *zone,
514                                            dns_db_t *db,
515                                            dns_dbversion_t *version,
516                                            dns_dbnode_t *node,
517                                            dns_name_t *fname);
518 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
519                                            dns_rdataset_t *rdataset,
520                                            dns_rdatasetadditional_t type,
521                                            dns_rdatatype_t qtype);
522 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
523                                               isc_stdtime_t now);
524 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
525                           isc_stdtime_t now);
526 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
527                           isc_boolean_t tree_locked);
528 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
529                           isc_stdtime_t now, isc_boolean_t tree_locked);
530 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
531                                   rdatasetheader_t *newheader);
532 static void prune_tree(isc_task_t *task, isc_event_t *event);
533 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
534 static void rdataset_expire(dns_rdataset_t *rdataset);
535
536 static dns_rdatasetmethods_t rdataset_methods = {
537         rdataset_disassociate,
538         rdataset_first,
539         rdataset_next,
540         rdataset_current,
541         rdataset_clone,
542         rdataset_count,
543         NULL,
544         rdataset_getnoqname,
545         NULL,
546         rdataset_getclosest,
547         rdataset_getadditional,
548         rdataset_setadditional,
549         rdataset_putadditional,
550         rdataset_settrust,
551         rdataset_expire
552 };
553
554 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
555 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
556 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
557 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
558                                  dns_rdataset_t *rdataset);
559
560 static dns_rdatasetitermethods_t rdatasetiter_methods = {
561         rdatasetiter_destroy,
562         rdatasetiter_first,
563         rdatasetiter_next,
564         rdatasetiter_current
565 };
566
567 typedef struct rbtdb_rdatasetiter {
568         dns_rdatasetiter_t              common;
569         rdatasetheader_t *              current;
570 } rbtdb_rdatasetiter_t;
571
572 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
573 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
574 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
575 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
576                                         dns_name_t *name);
577 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
578 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
579 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
580                                            dns_dbnode_t **nodep,
581                                            dns_name_t *name);
582 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
583 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
584                                           dns_name_t *name);
585
586 static dns_dbiteratormethods_t dbiterator_methods = {
587         dbiterator_destroy,
588         dbiterator_first,
589         dbiterator_last,
590         dbiterator_seek,
591         dbiterator_prev,
592         dbiterator_next,
593         dbiterator_current,
594         dbiterator_pause,
595         dbiterator_origin
596 };
597
598 #define DELETION_BATCH_MAX 64
599
600 /*
601  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
602  */
603 typedef struct rbtdb_dbiterator {
604         dns_dbiterator_t                common;
605         isc_boolean_t                   paused;
606         isc_boolean_t                   new_origin;
607         isc_rwlocktype_t                tree_locked;
608         isc_result_t                    result;
609         dns_fixedname_t                 name;
610         dns_fixedname_t                 origin;
611         dns_rbtnodechain_t              chain;
612         dns_rbtnodechain_t              nsec3chain;
613         dns_rbtnodechain_t              *current;
614         dns_rbtnode_t                   *node;
615         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
616         int                             delete;
617         isc_boolean_t                   nsec3only;
618         isc_boolean_t                   nonsec3;
619 } rbtdb_dbiterator_t;
620
621
622 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
623 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
624
625 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
626                        isc_event_t *event);
627 static void overmem(dns_db_t *db, isc_boolean_t overmem);
628 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
629                                isc_boolean_t *nsec3createflag);
630
631 /*%
632  * 'init_count' is used to initialize 'newheader->count' which inturn
633  * is used to determine where in the cycle rrset-order cyclic starts.
634  * We don't lock this as we don't care about simultaneous updates.
635  *
636  * Note:
637  *      Both init_count and header->count can be ISC_UINT32_MAX.
638  *      The count on the returned rdataset however can't be as
639  *      that indicates that the database does not implement cyclic
640  *      processing.
641  */
642 static unsigned int init_count;
643
644 /*
645  * Locking
646  *
647  * If a routine is going to lock more than one lock in this module, then
648  * the locking must be done in the following order:
649  *
650  *      Tree Lock
651  *
652  *      Node Lock       (Only one from the set may be locked at one time by
653  *                       any caller)
654  *
655  *      Database Lock
656  *
657  * Failure to follow this hierarchy can result in deadlock.
658  */
659
660 /*
661  * Deleting Nodes
662  *
663  * For zone databases the node for the origin of the zone MUST NOT be deleted.
664  */
665
666
667 /*
668  * DB Routines
669  */
670
671 static void
672 attach(dns_db_t *source, dns_db_t **targetp) {
673         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
674
675         REQUIRE(VALID_RBTDB(rbtdb));
676
677         isc_refcount_increment(&rbtdb->references, NULL);
678
679         *targetp = source;
680 }
681
682 static void
683 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
684         dns_rbtdb_t *rbtdb = event->ev_arg;
685
686         UNUSED(task);
687
688         free_rbtdb(rbtdb, ISC_TRUE, event);
689 }
690
691 static void
692 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
693                   isc_boolean_t increment)
694 {
695         dns_rdatastatstype_t statattributes = 0;
696         dns_rdatastatstype_t base = 0;
697         dns_rdatastatstype_t type;
698
699         /* At the moment we count statistics only for cache DB */
700         INSIST(IS_CACHE(rbtdb));
701
702         if (NEGATIVE(header)) {
703                 if (NXDOMAIN(header))
704                         statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
705                 else {
706                         statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
707                         base = RBTDB_RDATATYPE_EXT(header->type);
708                 }
709         } else
710                 base = RBTDB_RDATATYPE_BASE(header->type);
711
712         type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
713         if (increment)
714                 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
715         else
716                 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
717 }
718
719 static void
720 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
721         int idx;
722         isc_heap_t *heap;
723         dns_ttl_t oldttl;
724
725         oldttl = header->rdh_ttl;
726         header->rdh_ttl = newttl;
727
728         if (!IS_CACHE(rbtdb))
729                 return;
730
731         /*
732          * It's possible the rbtdb is not a cache.  If this is the case,
733          * we will not have a heap, and we move on.  If we do, though,
734          * we might need to adjust things.
735          */
736         if (header->heap_index == 0 || newttl == oldttl)
737                 return;
738         idx = header->node->locknum;
739         if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
740             return;
741         heap = rbtdb->heaps[idx];
742
743         if (newttl < oldttl)
744                 isc_heap_increased(heap, header->heap_index);
745         else
746                 isc_heap_decreased(heap, header->heap_index);
747 }
748
749 /*%
750  * These functions allow the heap code to rank the priority of each
751  * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
752  */
753 static isc_boolean_t
754 ttl_sooner(void *v1, void *v2) {
755         rdatasetheader_t *h1 = v1;
756         rdatasetheader_t *h2 = v2;
757
758         if (h1->rdh_ttl < h2->rdh_ttl)
759                 return (ISC_TRUE);
760         return (ISC_FALSE);
761 }
762
763 static isc_boolean_t
764 resign_sooner(void *v1, void *v2) {
765         rdatasetheader_t *h1 = v1;
766         rdatasetheader_t *h2 = v2;
767
768         if (h1->resign < h2->resign)
769                 return (ISC_TRUE);
770         return (ISC_FALSE);
771 }
772
773 /*%
774  * This function sets the heap index into the header.
775  */
776 static void
777 set_index(void *what, unsigned int index) {
778         rdatasetheader_t *h = what;
779
780         h->heap_index = index;
781 }
782
783 /*%
784  * Work out how many nodes can be deleted in the time between two
785  * requests to the nameserver.  Smooth the resulting number and use it
786  * as a estimate for the number of nodes to be deleted in the next
787  * iteration.
788  */
789 static unsigned int
790 adjust_quantum(unsigned int old, isc_time_t *start) {
791         unsigned int pps = dns_pps;     /* packets per second */
792         unsigned int interval;
793         isc_uint64_t usecs;
794         isc_time_t end;
795         unsigned int new;
796
797         if (pps < 100)
798                 pps = 100;
799         isc_time_now(&end);
800
801         interval = 1000000 / pps;       /* interval in usec */
802         if (interval == 0)
803                 interval = 1;
804         usecs = isc_time_microdiff(&end, start);
805         if (usecs == 0) {
806                 /*
807                  * We were unable to measure the amount of time taken.
808                  * Double the nodes deleted next time.
809                  */
810                 old *= 2;
811                 if (old > 1000)
812                         old = 1000;
813                 return (old);
814         }
815         new = old * interval;
816         new /= (unsigned int)usecs;
817         if (new == 0)
818                 new = 1;
819         else if (new > 1000)
820                 new = 1000;
821
822         /* Smooth */
823         new = (new + old * 3) / 4;
824
825         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
826                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
827
828         return (new);
829 }
830
831 static void
832 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
833         unsigned int i;
834         isc_ondestroy_t ondest;
835         isc_result_t result;
836         char buf[DNS_NAME_FORMATSIZE];
837         isc_time_t start;
838
839         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
840                 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
841
842         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
843         REQUIRE(rbtdb->future_version == NULL);
844
845         if (rbtdb->current_version != NULL) {
846                 unsigned int refs;
847
848                 isc_refcount_decrement(&rbtdb->current_version->references,
849                                        &refs);
850                 INSIST(refs == 0);
851                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
852                 isc_refcount_destroy(&rbtdb->current_version->references);
853                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
854                             sizeof(rbtdb_version_t));
855         }
856
857         /*
858          * We assume the number of remaining dead nodes is reasonably small;
859          * the overhead of unlinking all nodes here should be negligible.
860          */
861         for (i = 0; i < rbtdb->node_lock_count; i++) {
862                 dns_rbtnode_t *node;
863
864                 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
865                 while (node != NULL) {
866                         ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
867                         node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
868                 }
869         }
870
871         if (event == NULL)
872                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
873  again:
874         if (rbtdb->tree != NULL) {
875                 isc_time_now(&start);
876                 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
877                 if (result == ISC_R_QUOTA) {
878                         INSIST(rbtdb->task != NULL);
879                         if (rbtdb->quantum != 0)
880                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
881                                                                 &start);
882                         if (event == NULL)
883                                 event = isc_event_allocate(rbtdb->common.mctx,
884                                                            NULL,
885                                                          DNS_EVENT_FREESTORAGE,
886                                                            free_rbtdb_callback,
887                                                            rbtdb,
888                                                            sizeof(isc_event_t));
889                         if (event == NULL)
890                                 goto again;
891                         isc_task_send(rbtdb->task, &event);
892                         return;
893                 }
894                 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
895         }
896
897         if (rbtdb->nsec3 != NULL) {
898                 isc_time_now(&start);
899                 result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
900                 if (result == ISC_R_QUOTA) {
901                         INSIST(rbtdb->task != NULL);
902                         if (rbtdb->quantum != 0)
903                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
904                                                                 &start);
905                         if (event == NULL)
906                                 event = isc_event_allocate(rbtdb->common.mctx,
907                                                            NULL,
908                                                          DNS_EVENT_FREESTORAGE,
909                                                            free_rbtdb_callback,
910                                                            rbtdb,
911                                                            sizeof(isc_event_t));
912                         if (event == NULL)
913                                 goto again;
914                         isc_task_send(rbtdb->task, &event);
915                         return;
916                 }
917                 INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
918         }
919
920         if (event != NULL)
921                 isc_event_free(&event);
922         if (log) {
923                 if (dns_name_dynamic(&rbtdb->common.origin))
924                         dns_name_format(&rbtdb->common.origin, buf,
925                                         sizeof(buf));
926                 else
927                         strcpy(buf, "<UNKNOWN>");
928                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
929                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
930                               "done free_rbtdb(%s)", buf);
931         }
932         if (dns_name_dynamic(&rbtdb->common.origin))
933                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
934         for (i = 0; i < rbtdb->node_lock_count; i++) {
935                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
936                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
937         }
938
939         /*
940          * Clean up LRU / re-signing order lists.
941          */
942         if (rbtdb->rdatasets != NULL) {
943                 for (i = 0; i < rbtdb->node_lock_count; i++)
944                         INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
945                 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
946                             rbtdb->node_lock_count *
947                             sizeof(rdatasetheaderlist_t));
948         }
949         /*
950          * Clean up dead node buckets.
951          */
952         if (rbtdb->deadnodes != NULL) {
953                 for (i = 0; i < rbtdb->node_lock_count; i++)
954                         INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
955                 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
956                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
957         }
958         /*
959          * Clean up heap objects.
960          */
961         if (rbtdb->heaps != NULL) {
962                 for (i = 0; i < rbtdb->node_lock_count; i++)
963                         isc_heap_destroy(&rbtdb->heaps[i]);
964                 isc_mem_put(rbtdb->hmctx, rbtdb->heaps,
965                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
966         }
967
968         if (rbtdb->rrsetstats != NULL)
969                 dns_stats_detach(&rbtdb->rrsetstats);
970
971         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
972                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
973         isc_rwlock_destroy(&rbtdb->tree_lock);
974         isc_refcount_destroy(&rbtdb->references);
975         if (rbtdb->task != NULL)
976                 isc_task_detach(&rbtdb->task);
977
978         RBTDB_DESTROYLOCK(&rbtdb->lock);
979         rbtdb->common.magic = 0;
980         rbtdb->common.impmagic = 0;
981         ondest = rbtdb->common.ondest;
982         isc_mem_detach(&rbtdb->hmctx);
983         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
984         isc_ondestroy_notify(&ondest, rbtdb);
985 }
986
987 static inline void
988 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
989         isc_boolean_t want_free = ISC_FALSE;
990         unsigned int i;
991         unsigned int inactive = 0;
992
993         /* XXX check for open versions here */
994
995         if (rbtdb->soanode != NULL)
996                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
997         if (rbtdb->nsnode != NULL)
998                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
999
1000         /*
1001          * Even though there are no external direct references, there still
1002          * may be nodes in use.
1003          */
1004         for (i = 0; i < rbtdb->node_lock_count; i++) {
1005                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1006                 rbtdb->node_locks[i].exiting = ISC_TRUE;
1007                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1008                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1009                     == 0) {
1010                         inactive++;
1011                 }
1012         }
1013
1014         if (inactive != 0) {
1015                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1016                 rbtdb->active -= inactive;
1017                 if (rbtdb->active == 0)
1018                         want_free = ISC_TRUE;
1019                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1020                 if (want_free) {
1021                         char buf[DNS_NAME_FORMATSIZE];
1022                         if (dns_name_dynamic(&rbtdb->common.origin))
1023                                 dns_name_format(&rbtdb->common.origin, buf,
1024                                                 sizeof(buf));
1025                         else
1026                                 strcpy(buf, "<UNKNOWN>");
1027                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1028                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1029                                       "calling free_rbtdb(%s)", buf);
1030                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
1031                 }
1032         }
1033 }
1034
1035 static void
1036 detach(dns_db_t **dbp) {
1037         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1038         unsigned int refs;
1039
1040         REQUIRE(VALID_RBTDB(rbtdb));
1041
1042         isc_refcount_decrement(&rbtdb->references, &refs);
1043
1044         if (refs == 0)
1045                 maybe_free_rbtdb(rbtdb);
1046
1047         *dbp = NULL;
1048 }
1049
1050 static void
1051 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1052         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1053         rbtdb_version_t *version;
1054         unsigned int refs;
1055
1056         REQUIRE(VALID_RBTDB(rbtdb));
1057
1058         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1059         version = rbtdb->current_version;
1060         isc_refcount_increment(&version->references, &refs);
1061         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1062
1063         *versionp = (dns_dbversion_t *)version;
1064 }
1065
1066 static inline rbtdb_version_t *
1067 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1068                  unsigned int references, isc_boolean_t writer)
1069 {
1070         isc_result_t result;
1071         rbtdb_version_t *version;
1072
1073         version = isc_mem_get(mctx, sizeof(*version));
1074         if (version == NULL)
1075                 return (NULL);
1076         version->serial = serial;
1077         result = isc_refcount_init(&version->references, references);
1078         if (result != ISC_R_SUCCESS) {
1079                 isc_mem_put(mctx, version, sizeof(*version));
1080                 return (NULL);
1081         }
1082         version->writer = writer;
1083         version->commit_ok = ISC_FALSE;
1084         ISC_LIST_INIT(version->changed_list);
1085         ISC_LIST_INIT(version->resigned_list);
1086         ISC_LINK_INIT(version, link);
1087
1088         return (version);
1089 }
1090
1091 static isc_result_t
1092 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1093         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1094         rbtdb_version_t *version;
1095
1096         REQUIRE(VALID_RBTDB(rbtdb));
1097         REQUIRE(versionp != NULL && *versionp == NULL);
1098         REQUIRE(rbtdb->future_version == NULL);
1099
1100         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1101         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
1102         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1103                                    ISC_TRUE);
1104         if (version != NULL) {
1105                 version->commit_ok = ISC_TRUE;
1106                 version->secure = rbtdb->current_version->secure;
1107                 version->havensec3 = rbtdb->current_version->havensec3;
1108                 if (version->havensec3) {
1109                         version->flags = rbtdb->current_version->flags;
1110                         version->iterations =
1111                                 rbtdb->current_version->iterations;
1112                         version->hash = rbtdb->current_version->hash;
1113                         version->salt_length =
1114                                 rbtdb->current_version->salt_length;
1115                         memcpy(version->salt, rbtdb->current_version->salt,
1116                                version->salt_length);
1117                 } else {
1118                         version->flags = 0;
1119                         version->iterations = 0;
1120                         version->hash = 0;
1121                         version->salt_length = 0;
1122                         memset(version->salt, 0, sizeof(version->salt));
1123                 }
1124                 rbtdb->next_serial++;
1125                 rbtdb->future_version = version;
1126         }
1127         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1128
1129         if (version == NULL)
1130                 return (ISC_R_NOMEMORY);
1131
1132         *versionp = version;
1133
1134         return (ISC_R_SUCCESS);
1135 }
1136
1137 static void
1138 attachversion(dns_db_t *db, dns_dbversion_t *source,
1139               dns_dbversion_t **targetp)
1140 {
1141         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1142         rbtdb_version_t *rbtversion = source;
1143         unsigned int refs;
1144
1145         REQUIRE(VALID_RBTDB(rbtdb));
1146
1147         isc_refcount_increment(&rbtversion->references, &refs);
1148         INSIST(refs > 1);
1149
1150         *targetp = rbtversion;
1151 }
1152
1153 static rbtdb_changed_t *
1154 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1155             dns_rbtnode_t *node)
1156 {
1157         rbtdb_changed_t *changed;
1158         unsigned int refs;
1159
1160         /*
1161          * Caller must be holding the node lock if its reference must be
1162          * protected by the lock.
1163          */
1164
1165         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1166
1167         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1168
1169         REQUIRE(version->writer);
1170
1171         if (changed != NULL) {
1172                 dns_rbtnode_refincrement(node, &refs);
1173                 INSIST(refs != 0);
1174                 changed->node = node;
1175                 changed->dirty = ISC_FALSE;
1176                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1177         } else
1178                 version->commit_ok = ISC_FALSE;
1179
1180         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1181
1182         return (changed);
1183 }
1184
1185 static void
1186 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1187                  acachectl_t *array)
1188 {
1189         unsigned int count;
1190         unsigned int i;
1191         unsigned char *raw;     /* RDATASLAB */
1192
1193         /*
1194          * The caller must be holding the corresponding node lock.
1195          */
1196
1197         if (array == NULL)
1198                 return;
1199
1200         raw = (unsigned char *)header + sizeof(*header);
1201         count = raw[0] * 256 + raw[1];
1202
1203         /*
1204          * Sanity check: since an additional cache entry has a reference to
1205          * the original DB node (in the callback arg), there should be no
1206          * acache entries when the node can be freed.
1207          */
1208         for (i = 0; i < count; i++)
1209                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1210
1211         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1212 }
1213
1214 static inline void
1215 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1216
1217         if (dns_name_dynamic(&(*noqname)->name))
1218                 dns_name_free(&(*noqname)->name, mctx);
1219         if ((*noqname)->neg != NULL)
1220                 isc_mem_put(mctx, (*noqname)->neg,
1221                             dns_rdataslab_size((*noqname)->neg, 0));
1222         if ((*noqname)->negsig != NULL)
1223                 isc_mem_put(mctx, (*noqname)->negsig,
1224                             dns_rdataslab_size((*noqname)->negsig, 0));
1225         isc_mem_put(mctx, *noqname, sizeof(**noqname));
1226         *noqname = NULL;
1227 }
1228
1229 static inline void
1230 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1231 {
1232         ISC_LINK_INIT(h, link);
1233         h->heap_index = 0;
1234
1235 #if TRACE_HEADER
1236         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1237                 fprintf(stderr, "initialized header: %p\n", h);
1238 #else
1239         UNUSED(rbtdb);
1240 #endif
1241 }
1242
1243 static inline rdatasetheader_t *
1244 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1245 {
1246         rdatasetheader_t *h;
1247
1248         h = isc_mem_get(mctx, sizeof(*h));
1249         if (h == NULL)
1250                 return (NULL);
1251
1252 #if TRACE_HEADER
1253         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1254                 fprintf(stderr, "allocated header: %p\n", h);
1255 #endif
1256         init_rdataset(rbtdb, h);
1257         return (h);
1258 }
1259
1260 static inline void
1261 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1262 {
1263         unsigned int size;
1264         int idx;
1265
1266         if (EXISTS(rdataset) &&
1267             (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1268                 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1269         }
1270
1271         idx = rdataset->node->locknum;
1272         if (ISC_LINK_LINKED(rdataset, link)) {
1273                 INSIST(IS_CACHE(rbtdb));
1274                 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1275         }
1276         if (rdataset->heap_index != 0)
1277                 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1278         rdataset->heap_index = 0;
1279
1280         if (rdataset->noqname != NULL)
1281                 free_noqname(mctx, &rdataset->noqname);
1282         if (rdataset->closest != NULL)
1283                 free_noqname(mctx, &rdataset->closest);
1284
1285         free_acachearray(mctx, rdataset, rdataset->additional_auth);
1286         free_acachearray(mctx, rdataset, rdataset->additional_glue);
1287
1288         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1289                 size = sizeof(*rdataset);
1290         else
1291                 size = dns_rdataslab_size((unsigned char *)rdataset,
1292                                           sizeof(*rdataset));
1293         isc_mem_put(mctx, rdataset, size);
1294 }
1295
1296 static inline void
1297 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1298         rdatasetheader_t *header, *dcurrent;
1299         isc_boolean_t make_dirty = ISC_FALSE;
1300
1301         /*
1302          * Caller must hold the node lock.
1303          */
1304
1305         /*
1306          * We set the IGNORE attribute on rdatasets with serial number
1307          * 'serial'.  When the reference count goes to zero, these rdatasets
1308          * will be cleaned up; until that time, they will be ignored.
1309          */
1310         for (header = node->data; header != NULL; header = header->next) {
1311                 if (header->serial == serial) {
1312                         header->attributes |= RDATASET_ATTR_IGNORE;
1313                         make_dirty = ISC_TRUE;
1314                 }
1315                 for (dcurrent = header->down;
1316                      dcurrent != NULL;
1317                      dcurrent = dcurrent->down) {
1318                         if (dcurrent->serial == serial) {
1319                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1320                                 make_dirty = ISC_TRUE;
1321                         }
1322                 }
1323         }
1324         if (make_dirty)
1325                 node->dirty = 1;
1326 }
1327
1328 static inline void
1329 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1330 {
1331         rdatasetheader_t *d, *down_next;
1332
1333         for (d = top->down; d != NULL; d = down_next) {
1334                 down_next = d->down;
1335                 free_rdataset(rbtdb, mctx, d);
1336         }
1337         top->down = NULL;
1338 }
1339
1340 static inline void
1341 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1342         rdatasetheader_t *current, *top_prev, *top_next;
1343         isc_mem_t *mctx = rbtdb->common.mctx;
1344
1345         /*
1346          * Caller must be holding the node lock.
1347          */
1348
1349         top_prev = NULL;
1350         for (current = node->data; current != NULL; current = top_next) {
1351                 top_next = current->next;
1352                 clean_stale_headers(rbtdb, mctx, current);
1353                 /*
1354                  * If current is nonexistent or stale, we can clean it up.
1355                  */
1356                 if ((current->attributes &
1357                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1358                         if (top_prev != NULL)
1359                                 top_prev->next = current->next;
1360                         else
1361                                 node->data = current->next;
1362                         free_rdataset(rbtdb, mctx, current);
1363                 } else
1364                         top_prev = current;
1365         }
1366         node->dirty = 0;
1367 }
1368
1369 static inline void
1370 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1371                 rbtdb_serial_t least_serial)
1372 {
1373         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1374         rdatasetheader_t *top_prev, *top_next;
1375         isc_mem_t *mctx = rbtdb->common.mctx;
1376         isc_boolean_t still_dirty = ISC_FALSE;
1377
1378         /*
1379          * Caller must be holding the node lock.
1380          */
1381         REQUIRE(least_serial != 0);
1382
1383         top_prev = NULL;
1384         for (current = node->data; current != NULL; current = top_next) {
1385                 top_next = current->next;
1386
1387                 /*
1388                  * First, we clean up any instances of multiple rdatasets
1389                  * with the same serial number, or that have the IGNORE
1390                  * attribute.
1391                  */
1392                 dparent = current;
1393                 for (dcurrent = current->down;
1394                      dcurrent != NULL;
1395                      dcurrent = down_next) {
1396                         down_next = dcurrent->down;
1397                         INSIST(dcurrent->serial <= dparent->serial);
1398                         if (dcurrent->serial == dparent->serial ||
1399                             IGNORE(dcurrent)) {
1400                                 if (down_next != NULL)
1401                                         down_next->next = dparent;
1402                                 dparent->down = down_next;
1403                                 free_rdataset(rbtdb, mctx, dcurrent);
1404                         } else
1405                                 dparent = dcurrent;
1406                 }
1407
1408                 /*
1409                  * We've now eliminated all IGNORE datasets with the possible
1410                  * exception of current, which we now check.
1411                  */
1412                 if (IGNORE(current)) {
1413                         down_next = current->down;
1414                         if (down_next == NULL) {
1415                                 if (top_prev != NULL)
1416                                         top_prev->next = current->next;
1417                                 else
1418                                         node->data = current->next;
1419                                 free_rdataset(rbtdb, mctx, current);
1420                                 /*
1421                                  * current no longer exists, so we can
1422                                  * just continue with the loop.
1423                                  */
1424                                 continue;
1425                         } else {
1426                                 /*
1427                                  * Pull up current->down, making it the new
1428                                  * current.
1429                                  */
1430                                 if (top_prev != NULL)
1431                                         top_prev->next = down_next;
1432                                 else
1433                                         node->data = down_next;
1434                                 down_next->next = top_next;
1435                                 free_rdataset(rbtdb, mctx, current);
1436                                 current = down_next;
1437                         }
1438                 }
1439
1440                 /*
1441                  * We now try to find the first down node less than the
1442                  * least serial.
1443                  */
1444                 dparent = current;
1445                 for (dcurrent = current->down;
1446                      dcurrent != NULL;
1447                      dcurrent = down_next) {
1448                         down_next = dcurrent->down;
1449                         if (dcurrent->serial < least_serial)
1450                                 break;
1451                         dparent = dcurrent;
1452                 }
1453
1454                 /*
1455                  * If there is a such an rdataset, delete it and any older
1456                  * versions.
1457                  */
1458                 if (dcurrent != NULL) {
1459                         do {
1460                                 down_next = dcurrent->down;
1461                                 INSIST(dcurrent->serial <= least_serial);
1462                                 free_rdataset(rbtdb, mctx, dcurrent);
1463                                 dcurrent = down_next;
1464                         } while (dcurrent != NULL);
1465                         dparent->down = NULL;
1466                 }
1467
1468                 /*
1469                  * Note.  The serial number of 'current' might be less than
1470                  * least_serial too, but we cannot delete it because it is
1471                  * the most recent version, unless it is a NONEXISTENT
1472                  * rdataset.
1473                  */
1474                 if (current->down != NULL) {
1475                         still_dirty = ISC_TRUE;
1476                         top_prev = current;
1477                 } else {
1478                         /*
1479                          * If this is a NONEXISTENT rdataset, we can delete it.
1480                          */
1481                         if (NONEXISTENT(current)) {
1482                                 if (top_prev != NULL)
1483                                         top_prev->next = current->next;
1484                                 else
1485                                         node->data = current->next;
1486                                 free_rdataset(rbtdb, mctx, current);
1487                         } else
1488                                 top_prev = current;
1489                 }
1490         }
1491         if (!still_dirty)
1492                 node->dirty = 0;
1493 }
1494
1495 /*%
1496  * Clean up dead nodes.  These are nodes which have no references, and
1497  * have no data.  They are dead but we could not or chose not to delete
1498  * them when we deleted all the data at that node because we did not want
1499  * to wait for the tree write lock.
1500  *
1501  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1502  */
1503 static void
1504 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1505         dns_rbtnode_t *node;
1506         isc_result_t result;
1507         int count = 10;         /* XXXJT: should be adjustable */
1508
1509         node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1510         while (node != NULL && count > 0) {
1511                 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1512
1513                 /*
1514                  * Since we're holding a tree write lock, it should be
1515                  * impossible for this node to be referenced by others.
1516                  */
1517                 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1518                        node->data == NULL);
1519
1520                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1521                 if (node->nsec3)
1522                         result = dns_rbt_deletenode(rbtdb->nsec3, node,
1523                                                     ISC_FALSE);
1524                 else
1525                         result = dns_rbt_deletenode(rbtdb->tree, node,
1526                                                     ISC_FALSE);
1527                 if (result != ISC_R_SUCCESS)
1528                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1529                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1530                                       "cleanup_dead_nodes: "
1531                                       "dns_rbt_deletenode: %s",
1532                                       isc_result_totext(result));
1533                 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1534                 count--;
1535         }
1536 }
1537
1538 /*
1539  * Caller must be holding the node lock if its reference must be protected
1540  * by the lock.
1541  */
1542 static inline void
1543 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1544         unsigned int lockrefs, noderefs;
1545         isc_refcount_t *lockref;
1546
1547         dns_rbtnode_refincrement0(node, &noderefs);
1548         if (noderefs == 1) {    /* this is the first reference to the node */
1549                 lockref = &rbtdb->node_locks[node->locknum].references;
1550                 isc_refcount_increment0(lockref, &lockrefs);
1551                 INSIST(lockrefs != 0);
1552         }
1553         INSIST(noderefs != 0);
1554 }
1555
1556 /*
1557  * This function is assumed to be called when a node is newly referenced
1558  * and can be in the deadnode list.  In that case the node must be retrieved
1559  * from the list because it is going to be used.  In addition, if the caller
1560  * happens to hold a write lock on the tree, it's a good chance to purge dead
1561  * nodes.
1562  * Note: while a new reference is gained in multiple places, there are only very
1563  * few cases where the node can be in the deadnode list (only empty nodes can
1564  * have been added to the list).
1565  */
1566 static inline void
1567 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1568                 isc_rwlocktype_t treelocktype)
1569 {
1570         isc_boolean_t need_relock = ISC_FALSE;
1571
1572         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1573         new_reference(rbtdb, node);
1574
1575         NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1576                       isc_rwlocktype_read);
1577         if (ISC_LINK_LINKED(node, deadlink))
1578                 need_relock = ISC_TRUE;
1579         else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1580                  treelocktype == isc_rwlocktype_write)
1581                 need_relock = ISC_TRUE;
1582         NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1583                         isc_rwlocktype_read);
1584         if (need_relock) {
1585                 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1586                               isc_rwlocktype_write);
1587                 if (ISC_LINK_LINKED(node, deadlink))
1588                         ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1589                                         node, deadlink);
1590                 if (treelocktype == isc_rwlocktype_write)
1591                         cleanup_dead_nodes(rbtdb, node->locknum);
1592                 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1593                                 isc_rwlocktype_write);
1594         }
1595
1596         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1597 }
1598
1599 /*
1600  * Caller must be holding the node lock; either the "strong", read or write
1601  * lock.  Note that the lock must be held even when node references are
1602  * atomically modified; in that case the decrement operation itself does not
1603  * have to be protected, but we must avoid a race condition where multiple
1604  * threads are decreasing the reference to zero simultaneously and at least
1605  * one of them is going to free the node.
1606  * This function returns ISC_TRUE if and only if the node reference decreases
1607  * to zero.
1608  */
1609 static isc_boolean_t
1610 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1611                     rbtdb_serial_t least_serial,
1612                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1613                     isc_boolean_t pruning)
1614 {
1615         isc_result_t result;
1616         isc_boolean_t write_locked;
1617         rbtdb_nodelock_t *nodelock;
1618         unsigned int refs, nrefs;
1619         int bucket = node->locknum;
1620         isc_boolean_t no_reference;
1621
1622         nodelock = &rbtdb->node_locks[bucket];
1623
1624         /* Handle easy and typical case first. */
1625         if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1626                 dns_rbtnode_refdecrement(node, &nrefs);
1627                 INSIST((int)nrefs >= 0);
1628                 if (nrefs == 0) {
1629                         isc_refcount_decrement(&nodelock->references, &refs);
1630                         INSIST((int)refs >= 0);
1631                 }
1632                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1633         }
1634
1635         /* Upgrade the lock? */
1636         if (nlock == isc_rwlocktype_read) {
1637                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1638                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1639         }
1640         dns_rbtnode_refdecrement(node, &nrefs);
1641         INSIST((int)nrefs >= 0);
1642         if (nrefs > 0) {
1643                 /* Restore the lock? */
1644                 if (nlock == isc_rwlocktype_read)
1645                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1646                 return (ISC_FALSE);
1647         }
1648
1649         if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1650                 if (IS_CACHE(rbtdb))
1651                         clean_cache_node(rbtdb, node);
1652                 else {
1653                         if (least_serial == 0) {
1654                                 /*
1655                                  * Caller doesn't know the least serial.
1656                                  * Get it.
1657                                  */
1658                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1659                                 least_serial = rbtdb->least_serial;
1660                                 RBTDB_UNLOCK(&rbtdb->lock,
1661                                              isc_rwlocktype_read);
1662                         }
1663                         clean_zone_node(rbtdb, node, least_serial);
1664                 }
1665         }
1666
1667         isc_refcount_decrement(&nodelock->references, &refs);
1668         INSIST((int)refs >= 0);
1669
1670         /*
1671          * XXXDCL should this only be done for cache zones?
1672          */
1673         if (node->data != NULL || node->down != NULL) {
1674                 /* Restore the lock? */
1675                 if (nlock == isc_rwlocktype_read)
1676                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1677                 return (ISC_TRUE);
1678         }
1679
1680         /*
1681          * Attempt to switch to a write lock on the tree.  If this fails,
1682          * we will add this node to a linked list of nodes in this locking
1683          * bucket which we will free later.
1684          */
1685         if (tlock != isc_rwlocktype_write) {
1686                 /*
1687                  * Locking hierarchy notwithstanding, we don't need to free
1688                  * the node lock before acquiring the tree write lock because
1689                  * we only do a trylock.
1690                  */
1691                 if (tlock == isc_rwlocktype_read)
1692                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1693                 else
1694                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
1695                                                     isc_rwlocktype_write);
1696                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1697                               result == ISC_R_LOCKBUSY);
1698
1699                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1700         } else
1701                 write_locked = ISC_TRUE;
1702
1703         no_reference = ISC_TRUE;
1704         if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1705                 /*
1706                  * We can now delete the node if the reference counter is
1707                  * zero.  This should be typically the case, but a different
1708                  * thread may still gain a (new) reference just before the
1709                  * current thread locks the tree (e.g., in findnode()).
1710                  */
1711
1712                 /*
1713                  * If this node is the only one in the level it's in, deleting
1714                  * this node may recursively make its parent the only node in
1715                  * the parent level; if so, and if no one is currently using
1716                  * the parent node, this is almost the only opportunity to
1717                  * clean it up.  But the recursive cleanup is not that trivial
1718                  * since the child and parent may be in different lock buckets,
1719                  * which would cause a lock order reversal problem.  To avoid
1720                  * the trouble, we'll dispatch a separate event for batch
1721                  * cleaning.  We need to check whether we're deleting the node
1722                  * as a result of pruning to avoid infinite dispatching.
1723                  * Note: pruning happens only when a task has been set for the
1724                  * rbtdb.  If the user of the rbtdb chooses not to set a task,
1725                  * it's their responsibility to purge stale leaves (e.g. by
1726                  * periodic walk-through).
1727                  */
1728                 if (!pruning && node->parent != NULL &&
1729                     node->parent->down == node && node->left == NULL &&
1730                     node->right == NULL && rbtdb->task != NULL) {
1731                         isc_event_t *ev;
1732                         dns_db_t *db;
1733
1734                         ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1735                                                 DNS_EVENT_RBTPRUNE,
1736                                                 prune_tree, node,
1737                                                 sizeof(isc_event_t));
1738                         if (ev != NULL) {
1739                                 new_reference(rbtdb, node);
1740                                 db = NULL;
1741                                 attach((dns_db_t *)rbtdb, &db);
1742                                 ev->ev_sender = db;
1743                                 isc_task_send(rbtdb->task, &ev);
1744                                 no_reference = ISC_FALSE;
1745                         } else {
1746                                 /*
1747                                  * XXX: this is a weird situation.  We could
1748                                  * ignore this error case, but then the stale
1749                                  * node will unlikely be purged except via a
1750                                  * rare condition such as manual cleanup.  So
1751                                  * we queue it in the deadnodes list, hoping
1752                                  * the memory shortage is temporary and the node
1753                                  * will be deleted later.
1754                                  */
1755                                 isc_log_write(dns_lctx,
1756                                               DNS_LOGCATEGORY_DATABASE,
1757                                               DNS_LOGMODULE_CACHE,
1758                                               ISC_LOG_INFO,
1759                                               "decrement_reference: failed to "
1760                                               "allocate pruning event");
1761                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1762                                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1763                                                 deadlink);
1764                         }
1765                 } else {
1766                         if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1767                                 char printname[DNS_NAME_FORMATSIZE];
1768
1769                                 isc_log_write(dns_lctx,
1770                                               DNS_LOGCATEGORY_DATABASE,
1771                                               DNS_LOGMODULE_CACHE,
1772                                               ISC_LOG_DEBUG(1),
1773                                               "decrement_reference: "
1774                                               "delete from rbt: %p %s",
1775                                               node,
1776                                               dns_rbt_formatnodename(node,
1777                                                         printname,
1778                                                         sizeof(printname)));
1779                         }
1780
1781                         INSIST(!ISC_LINK_LINKED(node, deadlink));
1782                         if (node->nsec3)
1783                                 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1784                                                             ISC_FALSE);
1785                         else
1786                                 result = dns_rbt_deletenode(rbtdb->tree, node,
1787                                                             ISC_FALSE);
1788                         if (result != ISC_R_SUCCESS) {
1789                                 isc_log_write(dns_lctx,
1790                                               DNS_LOGCATEGORY_DATABASE,
1791                                               DNS_LOGMODULE_CACHE,
1792                                               ISC_LOG_WARNING,
1793                                               "decrement_reference: "
1794                                               "dns_rbt_deletenode: %s",
1795                                               isc_result_totext(result));
1796                         }
1797                 }
1798         } else if (dns_rbtnode_refcurrent(node) == 0) {
1799                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1800                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1801         } else
1802                 no_reference = ISC_FALSE;
1803
1804         /* Restore the lock? */
1805         if (nlock == isc_rwlocktype_read)
1806                 NODE_WEAKDOWNGRADE(&nodelock->lock);
1807
1808         /*
1809          * Relock a read lock, or unlock the write lock if no lock was held.
1810          */
1811         if (tlock == isc_rwlocktype_none)
1812                 if (write_locked)
1813                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1814
1815         if (tlock == isc_rwlocktype_read)
1816                 if (write_locked)
1817                         isc_rwlock_downgrade(&rbtdb->tree_lock);
1818
1819         return (no_reference);
1820 }
1821
1822 /*
1823  * Prune the tree by recursively cleaning-up single leaves.  In the worst
1824  * case, the number of iteration is the number of tree levels, which is at
1825  * most the maximum number of domain name labels, i.e, 127.  In practice, this
1826  * should be much smaller (only a few times), and even the worst case would be
1827  * acceptable for a single event.
1828  */
1829 static void
1830 prune_tree(isc_task_t *task, isc_event_t *event) {
1831         dns_rbtdb_t *rbtdb = event->ev_sender;
1832         dns_rbtnode_t *node = event->ev_arg;
1833         dns_rbtnode_t *parent;
1834         unsigned int locknum;
1835
1836         UNUSED(task);
1837
1838         isc_event_free(&event);
1839
1840         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1841         locknum = node->locknum;
1842         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1843         do {
1844                 parent = node->parent;
1845                 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1846                                     isc_rwlocktype_write, ISC_TRUE);
1847
1848                 if (parent != NULL && parent->down == NULL) {
1849                         /*
1850                          * node was the only down child of the parent and has
1851                          * just been removed.  We'll then need to examine the
1852                          * parent.  Keep the lock if possible; otherwise,
1853                          * release the old lock and acquire one for the parent.
1854                          */
1855                         if (parent->locknum != locknum) {
1856                                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1857                                             isc_rwlocktype_write);
1858                                 locknum = parent->locknum;
1859                                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1860                                           isc_rwlocktype_write);
1861                         }
1862
1863                         /*
1864                          * We need to gain a reference to the node before
1865                          * decrementing it in the next iteration.  In addition,
1866                          * if the node is in the dead-nodes list, extract it
1867                          * from the list beforehand as we do in
1868                          * reactivate_node().
1869                          */
1870                         new_reference(rbtdb, parent);
1871                         if (ISC_LINK_LINKED(parent, deadlink)) {
1872                                 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1873                                                 parent, deadlink);
1874                         }
1875                 } else
1876                         parent = NULL;
1877
1878                 node = parent;
1879         } while (node != NULL);
1880         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1881         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1882
1883         detach((dns_db_t **)&rbtdb);
1884 }
1885
1886 static inline void
1887 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1888                    rbtdb_changedlist_t *cleanup_list)
1889 {
1890         /*
1891          * Caller must be holding the database lock.
1892          */
1893
1894         rbtdb->least_serial = version->serial;
1895         *cleanup_list = version->changed_list;
1896         ISC_LIST_INIT(version->changed_list);
1897 }
1898
1899 static inline void
1900 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1901         rbtdb_changed_t *changed, *next_changed;
1902
1903         /*
1904          * If the changed record is dirty, then
1905          * an update created multiple versions of
1906          * a given rdataset.  We keep this list
1907          * until we're the least open version, at
1908          * which point it's safe to get rid of any
1909          * older versions.
1910          *
1911          * If the changed record isn't dirty, then
1912          * we don't need it anymore since we're
1913          * committing and not rolling back.
1914          *
1915          * The caller must be holding the database lock.
1916          */
1917         for (changed = HEAD(version->changed_list);
1918              changed != NULL;
1919              changed = next_changed) {
1920                 next_changed = NEXT(changed, link);
1921                 if (!changed->dirty) {
1922                         UNLINK(version->changed_list,
1923                                changed, link);
1924                         APPEND(*cleanup_list,
1925                                changed, link);
1926                 }
1927         }
1928 }
1929
1930 static void
1931 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1932         dns_rdataset_t keyset;
1933         dns_rdataset_t nsecset, signsecset;
1934         dns_rdata_t rdata = DNS_RDATA_INIT;
1935         isc_boolean_t haszonekey = ISC_FALSE;
1936         isc_boolean_t hasnsec = ISC_FALSE;
1937         isc_boolean_t hasoptbit = ISC_FALSE;
1938         isc_boolean_t nsec3createflag = ISC_FALSE;
1939         isc_result_t result;
1940
1941         dns_rdataset_init(&keyset);
1942         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1943                                      0, 0, &keyset, NULL);
1944         if (result == ISC_R_SUCCESS) {
1945                 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1946                 result = dns_rdataset_first(&keyset);
1947                 while (result == ISC_R_SUCCESS) {
1948                         dns_rdataset_current(&keyset, &keyrdata);
1949                         if (dns_zonekey_iszonekey(&keyrdata)) {
1950                                 haszonekey = ISC_TRUE;
1951                                 break;
1952                         }
1953                         result = dns_rdataset_next(&keyset);
1954                 }
1955                 dns_rdataset_disassociate(&keyset);
1956         }
1957         if (!haszonekey) {
1958                 version->secure = dns_db_insecure;
1959                 version->havensec3 = ISC_FALSE;
1960                 return;
1961         }
1962
1963         dns_rdataset_init(&nsecset);
1964         dns_rdataset_init(&signsecset);
1965         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1966                                      0, 0, &nsecset, &signsecset);
1967         if (result == ISC_R_SUCCESS) {
1968                 if (dns_rdataset_isassociated(&signsecset)) {
1969                         hasnsec = ISC_TRUE;
1970                         result = dns_rdataset_first(&nsecset);
1971                         if (result == ISC_R_SUCCESS) {
1972                                 dns_rdataset_current(&nsecset, &rdata);
1973                                 hasoptbit = dns_nsec_typepresent(&rdata,
1974                                                              dns_rdatatype_opt);
1975                         }
1976                         dns_rdataset_disassociate(&signsecset);
1977                 }
1978                 dns_rdataset_disassociate(&nsecset);
1979         }
1980
1981         setnsec3parameters(db, version, &nsec3createflag);
1982
1983         /*
1984          * Do we have a valid NSEC/NSEC3 chain?
1985          */
1986         if (version->havensec3 || (hasnsec && !hasoptbit))
1987                 version->secure = dns_db_secure;
1988         /*
1989          * Do we have a NSEC/NSEC3 chain under creation?
1990          */
1991         else if (hasoptbit || nsec3createflag)
1992                 version->secure = dns_db_partial;
1993         else
1994                 version->secure = dns_db_insecure;
1995 }
1996
1997 /*%<
1998  * Walk the origin node looking for NSEC3PARAM records.
1999  * Cache the nsec3 parameters.
2000  */
2001 static void
2002 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
2003                    isc_boolean_t *nsec3createflag)
2004 {
2005         dns_rbtnode_t *node;
2006         dns_rdata_nsec3param_t nsec3param;
2007         dns_rdata_t rdata = DNS_RDATA_INIT;
2008         isc_region_t region;
2009         isc_result_t result;
2010         rdatasetheader_t *header, *header_next;
2011         unsigned char *raw;             /* RDATASLAB */
2012         unsigned int count, length;
2013         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2014
2015         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2016         version->havensec3 = ISC_FALSE;
2017         node = rbtdb->origin_node;
2018         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2019                   isc_rwlocktype_read);
2020         for (header = node->data;
2021              header != NULL;
2022              header = header_next) {
2023                 header_next = header->next;
2024                 do {
2025                         if (header->serial <= version->serial &&
2026                             !IGNORE(header)) {
2027                                 if (NONEXISTENT(header))
2028                                         header = NULL;
2029                                 break;
2030                         } else
2031                                 header = header->down;
2032                 } while (header != NULL);
2033
2034                 if (header != NULL &&
2035                     header->type == dns_rdatatype_nsec3param) {
2036                         /*
2037                          * Find A NSEC3PARAM with a supported algorithm.
2038                          */
2039                         raw = (unsigned char *)header + sizeof(*header);
2040                         count = raw[0] * 256 + raw[1]; /* count */
2041 #if DNS_RDATASET_FIXED
2042                         raw += count * 4 + 2;
2043 #else
2044                         raw += 2;
2045 #endif
2046                         while (count-- > 0U) {
2047                                 length = raw[0] * 256 + raw[1];
2048 #if DNS_RDATASET_FIXED
2049                                 raw += 4;
2050 #else
2051                                 raw += 2;
2052 #endif
2053                                 region.base = raw;
2054                                 region.length = length;
2055                                 raw += length;
2056                                 dns_rdata_fromregion(&rdata,
2057                                                      rbtdb->common.rdclass,
2058                                                      dns_rdatatype_nsec3param,
2059                                                      &region);
2060                                 result = dns_rdata_tostruct(&rdata,
2061                                                             &nsec3param,
2062                                                             NULL);
2063                                 INSIST(result == ISC_R_SUCCESS);
2064                                 dns_rdata_reset(&rdata);
2065
2066                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2067                                     !dns_nsec3_supportedhash(nsec3param.hash))
2068                                         continue;
2069
2070 #ifdef RFC5155_STRICT
2071                                 if (nsec3param.flags != 0)
2072                                         continue;
2073 #else
2074                                 if ((nsec3param.flags & DNS_NSEC3FLAG_CREATE)
2075                                     != 0)
2076                                         *nsec3createflag = ISC_TRUE;
2077                                 if ((nsec3param.flags & ~DNS_NSEC3FLAG_OPTOUT)
2078                                     != 0)
2079                                         continue;
2080 #endif
2081
2082                                 memcpy(version->salt, nsec3param.salt,
2083                                        nsec3param.salt_length);
2084                                 version->hash = nsec3param.hash;
2085                                 version->salt_length = nsec3param.salt_length;
2086                                 version->iterations = nsec3param.iterations;
2087                                 version->flags = nsec3param.flags;
2088                                 version->havensec3 = ISC_TRUE;
2089                                 /*
2090                                  * Look for a better algorithm than the
2091                                  * unknown test algorithm.
2092                                  */
2093                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2094                                         goto unlock;
2095                         }
2096                 }
2097         }
2098  unlock:
2099         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2100                     isc_rwlocktype_read);
2101         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2102 }
2103
2104 static void
2105 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
2106         dns_rbtdb_t *rbtdb = event->ev_arg;
2107         isc_boolean_t again = ISC_FALSE;
2108         unsigned int locknum;
2109         unsigned int refs;
2110
2111         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2112         for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
2113                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2114                           isc_rwlocktype_write);
2115                 cleanup_dead_nodes(rbtdb, locknum);
2116                 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL)
2117                         again = ISC_TRUE;
2118                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2119                             isc_rwlocktype_write);
2120         }
2121         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2122         if (again)
2123                 isc_task_send(task, &event);
2124         else {
2125                 isc_event_free(&event);
2126                 isc_refcount_decrement(&rbtdb->references, &refs);
2127                 if (refs == 0)
2128                         maybe_free_rbtdb(rbtdb);
2129         }
2130 }
2131
2132 static void
2133 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2134         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2135         rbtdb_version_t *version, *cleanup_version, *least_greater;
2136         isc_boolean_t rollback = ISC_FALSE;
2137         rbtdb_changedlist_t cleanup_list;
2138         rdatasetheaderlist_t resigned_list;
2139         rbtdb_changed_t *changed, *next_changed;
2140         rbtdb_serial_t serial, least_serial;
2141         dns_rbtnode_t *rbtnode;
2142         unsigned int refs;
2143         rdatasetheader_t *header;
2144         isc_boolean_t writer;
2145
2146         REQUIRE(VALID_RBTDB(rbtdb));
2147         version = (rbtdb_version_t *)*versionp;
2148
2149         cleanup_version = NULL;
2150         ISC_LIST_INIT(cleanup_list);
2151         ISC_LIST_INIT(resigned_list);
2152
2153         isc_refcount_decrement(&version->references, &refs);
2154         if (refs > 0) {         /* typical and easy case first */
2155                 if (commit) {
2156                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2157                         INSIST(!version->writer);
2158                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2159                 }
2160                 goto end;
2161         }
2162
2163         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2164         serial = version->serial;
2165         writer = version->writer;
2166         if (version->writer) {
2167                 if (commit) {
2168                         unsigned cur_ref;
2169                         rbtdb_version_t *cur_version;
2170
2171                         INSIST(version->commit_ok);
2172                         INSIST(version == rbtdb->future_version);
2173                         /*
2174                          * The current version is going to be replaced.
2175                          * Release the (likely last) reference to it from the
2176                          * DB itself and unlink it from the open list.
2177                          */
2178                         cur_version = rbtdb->current_version;
2179                         isc_refcount_decrement(&cur_version->references,
2180                                                &cur_ref);
2181                         if (cur_ref == 0) {
2182                                 if (cur_version->serial == rbtdb->least_serial)
2183                                         INSIST(EMPTY(cur_version->changed_list));
2184                                 UNLINK(rbtdb->open_versions,
2185                                        cur_version, link);
2186                         }
2187                         if (EMPTY(rbtdb->open_versions)) {
2188                                 /*
2189                                  * We're going to become the least open
2190                                  * version.
2191                                  */
2192                                 make_least_version(rbtdb, version,
2193                                                    &cleanup_list);
2194                         } else {
2195                                 /*
2196                                  * Some other open version is the
2197                                  * least version.  We can't cleanup
2198                                  * records that were changed in this
2199                                  * version because the older versions
2200                                  * may still be in use by an open
2201                                  * version.
2202                                  *
2203                                  * We can, however, discard the
2204                                  * changed records for things that
2205                                  * we've added that didn't exist in
2206                                  * prior versions.
2207                                  */
2208                                 cleanup_nondirty(version, &cleanup_list);
2209                         }
2210                         /*
2211                          * If the (soon to be former) current version
2212                          * isn't being used by anyone, we can clean
2213                          * it up.
2214                          */
2215                         if (cur_ref == 0) {
2216                                 cleanup_version = cur_version;
2217                                 APPENDLIST(version->changed_list,
2218                                            cleanup_version->changed_list,
2219                                            link);
2220                         }
2221                         /*
2222                          * Become the current version.
2223                          */
2224                         version->writer = ISC_FALSE;
2225                         rbtdb->current_version = version;
2226                         rbtdb->current_serial = version->serial;
2227                         rbtdb->future_version = NULL;
2228
2229                         /*
2230                          * Keep the current version in the open list, and
2231                          * gain a reference for the DB itself (see the DB
2232                          * creation function below).  This must be the only
2233                          * case where we need to increment the counter from
2234                          * zero and need to use isc_refcount_increment0().
2235                          */
2236                         isc_refcount_increment0(&version->references,
2237                                                 &cur_ref);
2238                         INSIST(cur_ref == 1);
2239                         PREPEND(rbtdb->open_versions,
2240                                 rbtdb->current_version, link);
2241                         resigned_list = version->resigned_list;
2242                         ISC_LIST_INIT(version->resigned_list);
2243                 } else {
2244                         /*
2245                          * We're rolling back this transaction.
2246                          */
2247                         cleanup_list = version->changed_list;
2248                         ISC_LIST_INIT(version->changed_list);
2249                         resigned_list = version->resigned_list;
2250                         ISC_LIST_INIT(version->resigned_list);
2251                         rollback = ISC_TRUE;
2252                         cleanup_version = version;
2253                         rbtdb->future_version = NULL;
2254                 }
2255         } else {
2256                 if (version != rbtdb->current_version) {
2257                         /*
2258                          * There are no external or internal references
2259                          * to this version and it can be cleaned up.
2260                          */
2261                         cleanup_version = version;
2262
2263                         /*
2264                          * Find the version with the least serial
2265                          * number greater than ours.
2266                          */
2267                         least_greater = PREV(version, link);
2268                         if (least_greater == NULL)
2269                                 least_greater = rbtdb->current_version;
2270
2271                         INSIST(version->serial < least_greater->serial);
2272                         /*
2273                          * Is this the least open version?
2274                          */
2275                         if (version->serial == rbtdb->least_serial) {
2276                                 /*
2277                                  * Yes.  Install the new least open
2278                                  * version.
2279                                  */
2280                                 make_least_version(rbtdb,
2281                                                    least_greater,
2282                                                    &cleanup_list);
2283                         } else {
2284                                 /*
2285                                  * Add any unexecuted cleanups to
2286                                  * those of the least greater version.
2287                                  */
2288                                 APPENDLIST(least_greater->changed_list,
2289                                            version->changed_list,
2290                                            link);
2291                         }
2292                 } else if (version->serial == rbtdb->least_serial)
2293                         INSIST(EMPTY(version->changed_list));
2294                 UNLINK(rbtdb->open_versions, version, link);
2295         }
2296         least_serial = rbtdb->least_serial;
2297         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2298
2299         /*
2300          * Update the zone's secure status.
2301          */
2302         if (writer && commit && !IS_CACHE(rbtdb))
2303                 iszonesecure(db, version, rbtdb->origin_node);
2304
2305         if (cleanup_version != NULL) {
2306                 INSIST(EMPTY(cleanup_version->changed_list));
2307                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2308                             sizeof(*cleanup_version));
2309         }
2310
2311         /*
2312          * Commit/rollback re-signed headers.
2313          */
2314         for (header = HEAD(resigned_list);
2315              header != NULL;
2316              header = HEAD(resigned_list)) {
2317                 nodelock_t *lock;
2318
2319                 ISC_LIST_UNLINK(resigned_list, header, link);
2320
2321                 lock = &rbtdb->node_locks[header->node->locknum].lock;
2322                 NODE_LOCK(lock, isc_rwlocktype_write);
2323                 if (rollback)
2324                         resign_insert(rbtdb, header->node->locknum, header);
2325                 decrement_reference(rbtdb, header->node, least_serial,
2326                                     isc_rwlocktype_write, isc_rwlocktype_none,
2327                                     ISC_FALSE);
2328                 NODE_UNLOCK(lock, isc_rwlocktype_write);
2329         }
2330
2331         if (!EMPTY(cleanup_list)) {
2332                 isc_event_t *event = NULL;
2333                 isc_rwlocktype_t tlock = isc_rwlocktype_none;
2334
2335                 if (rbtdb->task != NULL)
2336                         event = isc_event_allocate(rbtdb->common.mctx, NULL,
2337                                                    DNS_EVENT_RBTDEADNODES,
2338                                                    cleanup_dead_nodes_callback,
2339                                                    rbtdb, sizeof(isc_event_t));
2340                 if (event == NULL) {
2341                         /*
2342                          * We acquire a tree write lock here in order to make
2343                          * sure that stale nodes will be removed in
2344                          * decrement_reference().  If we didn't have the lock,
2345                          * those nodes could miss the chance to be removed
2346                          * until the server stops.  The write lock is
2347                          * expensive, but this event should be rare enough
2348                          * to justify the cost.
2349                          */
2350                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2351                         tlock = isc_rwlocktype_write;
2352                 }
2353
2354                 for (changed = HEAD(cleanup_list);
2355                      changed != NULL;
2356                      changed = next_changed) {
2357                         nodelock_t *lock;
2358
2359                         next_changed = NEXT(changed, link);
2360                         rbtnode = changed->node;
2361                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2362
2363                         NODE_LOCK(lock, isc_rwlocktype_write);
2364                         /*
2365                          * This is a good opportunity to purge any dead nodes,
2366                          * so use it.
2367                          */
2368                         if (event == NULL)
2369                                 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2370
2371                         if (rollback)
2372                                 rollback_node(rbtnode, serial);
2373                         decrement_reference(rbtdb, rbtnode, least_serial,
2374                                             isc_rwlocktype_write, tlock,
2375                                             ISC_FALSE);
2376
2377                         NODE_UNLOCK(lock, isc_rwlocktype_write);
2378
2379                         isc_mem_put(rbtdb->common.mctx, changed,
2380                                     sizeof(*changed));
2381                 }
2382                 if (event != NULL) {
2383                         isc_refcount_increment(&rbtdb->references, NULL);
2384                         isc_task_send(rbtdb->task, &event);
2385                 } else
2386                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2387         }
2388
2389  end:
2390         *versionp = NULL;
2391 }
2392
2393 /*
2394  * Add the necessary magic for the wildcard name 'name'
2395  * to be found in 'rbtdb'.
2396  *
2397  * In order for wildcard matching to work correctly in
2398  * zone_find(), we must ensure that a node for the wildcarding
2399  * level exists in the database, and has its 'find_callback'
2400  * and 'wild' bits set.
2401  *
2402  * E.g. if the wildcard name is "*.sub.example." then we
2403  * must ensure that "sub.example." exists and is marked as
2404  * a wildcard level.
2405  */
2406 static isc_result_t
2407 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2408         isc_result_t result;
2409         dns_name_t foundname;
2410         dns_offsets_t offsets;
2411         unsigned int n;
2412         dns_rbtnode_t *node = NULL;
2413
2414         dns_name_init(&foundname, offsets);
2415         n = dns_name_countlabels(name);
2416         INSIST(n >= 2);
2417         n--;
2418         dns_name_getlabelsequence(name, 1, n, &foundname);
2419         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2420         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2421                 return (result);
2422         node->nsec3 = 0;
2423         node->find_callback = 1;
2424         node->wild = 1;
2425         return (ISC_R_SUCCESS);
2426 }
2427
2428 static isc_result_t
2429 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2430         isc_result_t result;
2431         dns_name_t foundname;
2432         dns_offsets_t offsets;
2433         unsigned int n, l, i;
2434
2435         dns_name_init(&foundname, offsets);
2436         n = dns_name_countlabels(name);
2437         l = dns_name_countlabels(&rbtdb->common.origin);
2438         i = l + 1;
2439         while (i < n) {
2440                 dns_rbtnode_t *node = NULL;     /* dummy */
2441                 dns_name_getlabelsequence(name, n - i, i, &foundname);
2442                 if (dns_name_iswildcard(&foundname)) {
2443                         result = add_wildcard_magic(rbtdb, &foundname);
2444                         if (result != ISC_R_SUCCESS)
2445                                 return (result);
2446                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
2447                                                  &node);
2448                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2449                                 return (result);
2450                         node->nsec3 = 0;
2451                 }
2452                 i++;
2453         }
2454         return (ISC_R_SUCCESS);
2455 }
2456
2457 static isc_result_t
2458 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2459          dns_dbnode_t **nodep)
2460 {
2461         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2462         dns_rbtnode_t *node = NULL;
2463         dns_name_t nodename;
2464         isc_result_t result;
2465         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2466
2467         REQUIRE(VALID_RBTDB(rbtdb));
2468
2469         dns_name_init(&nodename, NULL);
2470         RWLOCK(&rbtdb->tree_lock, locktype);
2471         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2472                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2473         if (result != ISC_R_SUCCESS) {
2474                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2475                 if (!create) {
2476                         if (result == DNS_R_PARTIALMATCH)
2477                                 result = ISC_R_NOTFOUND;
2478                         return (result);
2479                 }
2480                 /*
2481                  * It would be nice to try to upgrade the lock instead of
2482                  * unlocking then relocking.
2483                  */
2484                 locktype = isc_rwlocktype_write;
2485                 RWLOCK(&rbtdb->tree_lock, locktype);
2486                 node = NULL;
2487                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2488                 if (result == ISC_R_SUCCESS) {
2489                         dns_rbt_namefromnode(node, &nodename);
2490 #ifdef DNS_RBT_USEHASH
2491                         node->locknum = node->hashval % rbtdb->node_lock_count;
2492 #else
2493                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2494                                 rbtdb->node_lock_count;
2495 #endif
2496                         node->nsec3 = 0;
2497                         add_empty_wildcards(rbtdb, name);
2498
2499                         if (dns_name_iswildcard(name)) {
2500                                 result = add_wildcard_magic(rbtdb, name);
2501                                 if (result != ISC_R_SUCCESS) {
2502                                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2503                                         return (result);
2504                                 }
2505                         }
2506                 } else if (result != ISC_R_EXISTS) {
2507                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2508                         return (result);
2509                 }
2510         }
2511         reactivate_node(rbtdb, node, locktype);
2512         RWUNLOCK(&rbtdb->tree_lock, locktype);
2513
2514         *nodep = (dns_dbnode_t *)node;
2515
2516         return (ISC_R_SUCCESS);
2517 }
2518
2519 static isc_result_t
2520 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2521               dns_dbnode_t **nodep)
2522 {
2523         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2524         dns_rbtnode_t *node = NULL;
2525         dns_name_t nodename;
2526         isc_result_t result;
2527         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2528
2529         REQUIRE(VALID_RBTDB(rbtdb));
2530
2531         dns_name_init(&nodename, NULL);
2532         RWLOCK(&rbtdb->tree_lock, locktype);
2533         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2534                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2535         if (result != ISC_R_SUCCESS) {
2536                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2537                 if (!create) {
2538                         if (result == DNS_R_PARTIALMATCH)
2539                                 result = ISC_R_NOTFOUND;
2540                         return (result);
2541                 }
2542                 /*
2543                  * It would be nice to try to upgrade the lock instead of
2544                  * unlocking then relocking.
2545                  */
2546                 locktype = isc_rwlocktype_write;
2547                 RWLOCK(&rbtdb->tree_lock, locktype);
2548                 node = NULL;
2549                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2550                 if (result == ISC_R_SUCCESS) {
2551                         dns_rbt_namefromnode(node, &nodename);
2552 #ifdef DNS_RBT_USEHASH
2553                         node->locknum = node->hashval % rbtdb->node_lock_count;
2554 #else
2555                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2556                                 rbtdb->node_lock_count;
2557 #endif
2558                         node->nsec3 = 1U;
2559                 } else if (result != ISC_R_EXISTS) {
2560                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2561                         return (result);
2562                 }
2563         } else
2564                 INSIST(node->nsec3);
2565         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2566         new_reference(rbtdb, node);
2567         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2568         RWUNLOCK(&rbtdb->tree_lock, locktype);
2569
2570         *nodep = (dns_dbnode_t *)node;
2571
2572         return (ISC_R_SUCCESS);
2573 }
2574
2575 static isc_result_t
2576 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2577         rbtdb_search_t *search = arg;
2578         rdatasetheader_t *header, *header_next;
2579         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2580         rdatasetheader_t *found;
2581         isc_result_t result;
2582         dns_rbtnode_t *onode;
2583
2584         /*
2585          * We only want to remember the topmost zone cut, since it's the one
2586          * that counts, so we'll just continue if we've already found a
2587          * zonecut.
2588          */
2589         if (search->zonecut != NULL)
2590                 return (DNS_R_CONTINUE);
2591
2592         found = NULL;
2593         result = DNS_R_CONTINUE;
2594         onode = search->rbtdb->origin_node;
2595
2596         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2597                   isc_rwlocktype_read);
2598
2599         /*
2600          * Look for an NS or DNAME rdataset active in our version.
2601          */
2602         ns_header = NULL;
2603         dname_header = NULL;
2604         sigdname_header = NULL;
2605         for (header = node->data; header != NULL; header = header_next) {
2606                 header_next = header->next;
2607                 if (header->type == dns_rdatatype_ns ||
2608                     header->type == dns_rdatatype_dname ||
2609                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
2610                         do {
2611                                 if (header->serial <= search->serial &&
2612                                     !IGNORE(header)) {
2613                                         /*
2614                                          * Is this a "this rdataset doesn't
2615                                          * exist" record?
2616                                          */
2617                                         if (NONEXISTENT(header))
2618                                                 header = NULL;
2619                                         break;
2620                                 } else
2621                                         header = header->down;
2622                         } while (header != NULL);
2623                         if (header != NULL) {
2624                                 if (header->type == dns_rdatatype_dname)
2625                                         dname_header = header;
2626                                 else if (header->type ==
2627                                            RBTDB_RDATATYPE_SIGDNAME)
2628                                         sigdname_header = header;
2629                                 else if (node != onode ||
2630                                          IS_STUB(search->rbtdb)) {
2631                                         /*
2632                                          * We've found an NS rdataset that
2633                                          * isn't at the origin node.  We check
2634                                          * that they're not at the origin node,
2635                                          * because otherwise we'd erroneously
2636                                          * treat the zone top as if it were
2637                                          * a delegation.
2638                                          */
2639                                         ns_header = header;
2640                                 }
2641                         }
2642                 }
2643         }
2644
2645         /*
2646          * Did we find anything?
2647          */
2648         if (!IS_CACHE(search->rbtdb) && !IS_STUB(search->rbtdb) &&
2649             ns_header != NULL) {
2650                 /*
2651                  * Note that NS has precedence over DNAME if both exist
2652                  * in a zone.  Otherwise DNAME take precedence over NS.
2653                  */
2654                 found = ns_header;
2655                 search->zonecut_sigrdataset = NULL;
2656         } else if (dname_header != NULL) {
2657                 found = dname_header;
2658                 search->zonecut_sigrdataset = sigdname_header;
2659         } else if (ns_header != NULL) {
2660                 found = ns_header;
2661                 search->zonecut_sigrdataset = NULL;
2662         }
2663
2664         if (found != NULL) {
2665                 /*
2666                  * We increment the reference count on node to ensure that
2667                  * search->zonecut_rdataset will still be valid later.
2668                  */
2669                 new_reference(search->rbtdb, node);
2670                 search->zonecut = node;
2671                 search->zonecut_rdataset = found;
2672                 search->need_cleanup = ISC_TRUE;
2673                 /*
2674                  * Since we've found a zonecut, anything beneath it is
2675                  * glue and is not subject to wildcard matching, so we
2676                  * may clear search->wild.
2677                  */
2678                 search->wild = ISC_FALSE;
2679                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2680                         /*
2681                          * If the caller does not want to find glue, then
2682                          * this is the best answer and the search should
2683                          * stop now.
2684                          */
2685                         result = DNS_R_PARTIALMATCH;
2686                 } else {
2687                         dns_name_t *zcname;
2688
2689                         /*
2690                          * The search will continue beneath the zone cut.
2691                          * This may or may not be the best match.  In case it
2692                          * is, we need to remember the node name.
2693                          */
2694                         zcname = dns_fixedname_name(&search->zonecut_name);
2695                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2696                                       ISC_R_SUCCESS);
2697                         search->copy_name = ISC_TRUE;
2698                 }
2699         } else {
2700                 /*
2701                  * There is no zonecut at this node which is active in this
2702                  * version.
2703                  *
2704                  * If this is a "wild" node and the caller hasn't disabled
2705                  * wildcard matching, remember that we've seen a wild node
2706                  * in case we need to go searching for wildcard matches
2707                  * later on.
2708                  */
2709                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2710                         search->wild = ISC_TRUE;
2711         }
2712
2713         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2714                     isc_rwlocktype_read);
2715
2716         return (result);
2717 }
2718
2719 static inline void
2720 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2721               rdatasetheader_t *header, isc_stdtime_t now,
2722               dns_rdataset_t *rdataset)
2723 {
2724         unsigned char *raw;     /* RDATASLAB */
2725
2726         /*
2727          * Caller must be holding the node reader lock.
2728          * XXXJT: technically, we need a writer lock, since we'll increment
2729          * the header count below.  However, since the actual counter value
2730          * doesn't matter, we prioritize performance here.  (We may want to
2731          * use atomic increment when available).
2732          */
2733
2734         if (rdataset == NULL)
2735                 return;
2736
2737         new_reference(rbtdb, node);
2738
2739         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
2740
2741         rdataset->methods = &rdataset_methods;
2742         rdataset->rdclass = rbtdb->common.rdclass;
2743         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2744         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2745         rdataset->ttl = header->rdh_ttl - now;
2746         rdataset->trust = header->trust;
2747         if (NEGATIVE(header))
2748                 rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
2749         if (NXDOMAIN(header))
2750                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2751         if (OPTOUT(header))
2752                 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2753         rdataset->private1 = rbtdb;
2754         rdataset->private2 = node;
2755         raw = (unsigned char *)header + sizeof(*header);
2756         rdataset->private3 = raw;
2757         rdataset->count = header->count++;
2758         if (rdataset->count == ISC_UINT32_MAX)
2759                 rdataset->count = 0;
2760
2761         /*
2762          * Reset iterator state.
2763          */
2764         rdataset->privateuint4 = 0;
2765         rdataset->private5 = NULL;
2766
2767         /*
2768          * Add noqname proof.
2769          */
2770         rdataset->private6 = header->noqname;
2771         if (rdataset->private6 != NULL)
2772                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
2773         rdataset->private7 = header->closest;
2774         if (rdataset->private7 != NULL)
2775                 rdataset->attributes |=  DNS_RDATASETATTR_CLOSEST;
2776
2777         /*
2778          * Copy out re-signing information.
2779          */
2780         if (RESIGN(header)) {
2781                 rdataset->attributes |=  DNS_RDATASETATTR_RESIGN;
2782                 rdataset->resign = header->resign;
2783         } else
2784                 rdataset->resign = 0;
2785 }
2786
2787 static inline isc_result_t
2788 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2789                  dns_name_t *foundname, dns_rdataset_t *rdataset,
2790                  dns_rdataset_t *sigrdataset)
2791 {
2792         isc_result_t result;
2793         dns_name_t *zcname;
2794         rbtdb_rdatatype_t type;
2795         dns_rbtnode_t *node;
2796
2797         /*
2798          * The caller MUST NOT be holding any node locks.
2799          */
2800
2801         node = search->zonecut;
2802         type = search->zonecut_rdataset->type;
2803
2804         /*
2805          * If we have to set foundname, we do it before anything else.
2806          * If we were to set foundname after we had set nodep or bound the
2807          * rdataset, then we'd have to undo that work if dns_name_copy()
2808          * failed.  By setting foundname first, there's nothing to undo if
2809          * we have trouble.
2810          */
2811         if (foundname != NULL && search->copy_name) {
2812                 zcname = dns_fixedname_name(&search->zonecut_name);
2813                 result = dns_name_copy(zcname, foundname, NULL);
2814                 if (result != ISC_R_SUCCESS)
2815                         return (result);
2816         }
2817         if (nodep != NULL) {
2818                 /*
2819                  * Note that we don't have to increment the node's reference
2820                  * count here because we're going to use the reference we
2821                  * already have in the search block.
2822                  */
2823                 *nodep = node;
2824                 search->need_cleanup = ISC_FALSE;
2825         }
2826         if (rdataset != NULL) {
2827                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2828                           isc_rwlocktype_read);
2829                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2830                               search->now, rdataset);
2831                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2832                         bind_rdataset(search->rbtdb, node,
2833                                       search->zonecut_sigrdataset,
2834                                       search->now, sigrdataset);
2835                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2836                             isc_rwlocktype_read);
2837         }
2838
2839         if (type == dns_rdatatype_dname)
2840                 return (DNS_R_DNAME);
2841         return (DNS_R_DELEGATION);
2842 }
2843
2844 static inline isc_boolean_t
2845 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2846            dns_rbtnode_t *node)
2847 {
2848         unsigned char *raw;     /* RDATASLAB */
2849         unsigned int count, size;
2850         dns_name_t ns_name;
2851         isc_boolean_t valid = ISC_FALSE;
2852         dns_offsets_t offsets;
2853         isc_region_t region;
2854         rdatasetheader_t *header;
2855
2856         /*
2857          * No additional locking is required.
2858          */
2859
2860         /*
2861          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
2862          * if it occurs at a zone cut, but is not valid below it.
2863          */
2864         if (type == dns_rdatatype_ns) {
2865                 if (node != search->zonecut) {
2866                         return (ISC_FALSE);
2867                 }
2868         } else if (type != dns_rdatatype_a &&
2869                    type != dns_rdatatype_aaaa &&
2870                    type != dns_rdatatype_a6) {
2871                 return (ISC_FALSE);
2872         }
2873
2874         header = search->zonecut_rdataset;
2875         raw = (unsigned char *)header + sizeof(*header);
2876         count = raw[0] * 256 + raw[1];
2877 #if DNS_RDATASET_FIXED
2878         raw += 2 + (4 * count);
2879 #else
2880         raw += 2;
2881 #endif
2882
2883         while (count > 0) {
2884                 count--;
2885                 size = raw[0] * 256 + raw[1];
2886 #if DNS_RDATASET_FIXED
2887                 raw += 4;
2888 #else
2889                 raw += 2;
2890 #endif
2891                 region.base = raw;
2892                 region.length = size;
2893                 raw += size;
2894                 /*
2895                  * XXX Until we have rdata structures, we have no choice but
2896                  * to directly access the rdata format.
2897                  */
2898                 dns_name_init(&ns_name, offsets);
2899                 dns_name_fromregion(&ns_name, &region);
2900                 if (dns_name_compare(&ns_name, name) == 0) {
2901                         valid = ISC_TRUE;
2902                         break;
2903                 }
2904         }
2905
2906         return (valid);
2907 }
2908
2909 static inline isc_boolean_t
2910 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2911             dns_name_t *name)
2912 {
2913         dns_fixedname_t fnext;
2914         dns_fixedname_t forigin;
2915         dns_name_t *next;
2916         dns_name_t *origin;
2917         dns_name_t prefix;
2918         dns_rbtdb_t *rbtdb;
2919         dns_rbtnode_t *node;
2920         isc_result_t result;
2921         isc_boolean_t answer = ISC_FALSE;
2922         rdatasetheader_t *header;
2923
2924         rbtdb = search->rbtdb;
2925
2926         dns_name_init(&prefix, NULL);
2927         dns_fixedname_init(&fnext);
2928         next = dns_fixedname_name(&fnext);
2929         dns_fixedname_init(&forigin);
2930         origin = dns_fixedname_name(&forigin);
2931
2932         result = dns_rbtnodechain_next(chain, NULL, NULL);
2933         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2934                 node = NULL;
2935                 result = dns_rbtnodechain_current(chain, &prefix,
2936                                                   origin, &node);
2937                 if (result != ISC_R_SUCCESS)
2938                         break;
2939                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2940                           isc_rwlocktype_read);
2941                 for (header = node->data;
2942                      header != NULL;
2943                      header = header->next) {
2944                         if (header->serial <= search->serial &&
2945                             !IGNORE(header) && EXISTS(header))
2946                                 break;
2947                 }
2948                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2949                             isc_rwlocktype_read);
2950                 if (header != NULL)
2951                         break;
2952                 result = dns_rbtnodechain_next(chain, NULL, NULL);
2953         }
2954         if (result == ISC_R_SUCCESS)
2955                 result = dns_name_concatenate(&prefix, origin, next, NULL);
2956         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2957                 answer = ISC_TRUE;
2958         return (answer);
2959 }
2960
2961 static inline isc_boolean_t
2962 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2963         dns_fixedname_t fnext;
2964         dns_fixedname_t forigin;
2965         dns_fixedname_t fprev;
2966         dns_name_t *next;
2967         dns_name_t *origin;
2968         dns_name_t *prev;
2969         dns_name_t name;
2970         dns_name_t rname;
2971         dns_name_t tname;
2972         dns_rbtdb_t *rbtdb;
2973         dns_rbtnode_t *node;
2974         dns_rbtnodechain_t chain;
2975         isc_boolean_t check_next = ISC_TRUE;
2976         isc_boolean_t check_prev = ISC_TRUE;
2977         isc_boolean_t answer = ISC_FALSE;
2978         isc_result_t result;
2979         rdatasetheader_t *header;
2980         unsigned int n;
2981
2982         rbtdb = search->rbtdb;
2983
2984         dns_name_init(&name, NULL);
2985         dns_name_init(&tname, NULL);
2986         dns_name_init(&rname, NULL);
2987         dns_fixedname_init(&fnext);
2988         next = dns_fixedname_name(&fnext);
2989         dns_fixedname_init(&fprev);
2990         prev = dns_fixedname_name(&fprev);
2991         dns_fixedname_init(&forigin);
2992         origin = dns_fixedname_name(&forigin);
2993
2994         /*
2995          * Find if qname is at or below a empty node.
2996          * Use our own copy of the chain.
2997          */
2998
2999         chain = search->chain;
3000         do {
3001                 node = NULL;
3002                 result = dns_rbtnodechain_current(&chain, &name,
3003                                                   origin, &node);
3004                 if (result != ISC_R_SUCCESS)
3005                         break;
3006                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3007                           isc_rwlocktype_read);
3008                 for (header = node->data;
3009                      header != NULL;
3010                      header = header->next) {
3011                         if (header->serial <= search->serial &&
3012                             !IGNORE(header) && EXISTS(header))
3013                                 break;
3014                 }
3015                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3016                             isc_rwlocktype_read);
3017                 if (header != NULL)
3018                         break;
3019                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
3020         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
3021         if (result == ISC_R_SUCCESS)
3022                 result = dns_name_concatenate(&name, origin, prev, NULL);
3023         if (result != ISC_R_SUCCESS)
3024                 check_prev = ISC_FALSE;
3025
3026         result = dns_rbtnodechain_next(&chain, NULL, NULL);
3027         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3028                 node = NULL;
3029                 result = dns_rbtnodechain_current(&chain, &name,
3030                                                   origin, &node);
3031                 if (result != ISC_R_SUCCESS)
3032                         break;
3033                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3034                           isc_rwlocktype_read);
3035                 for (header = node->data;
3036                      header != NULL;
3037                      header = header->next) {
3038                         if (header->serial <= search->serial &&
3039                             !IGNORE(header) && EXISTS(header))
3040                                 break;
3041                 }
3042                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3043                             isc_rwlocktype_read);
3044                 if (header != NULL)
3045                         break;
3046                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3047         }
3048         if (result == ISC_R_SUCCESS)
3049                 result = dns_name_concatenate(&name, origin, next, NULL);
3050         if (result != ISC_R_SUCCESS)
3051                 check_next = ISC_FALSE;
3052
3053         dns_name_clone(qname, &rname);
3054
3055         /*
3056          * Remove the wildcard label to find the terminal name.
3057          */
3058         n = dns_name_countlabels(wname);
3059         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3060
3061         do {
3062                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3063                     (check_next && dns_name_issubdomain(next, &rname))) {
3064                         answer = ISC_TRUE;
3065                         break;
3066                 }
3067                 /*
3068                  * Remove the left hand label.
3069                  */
3070                 n = dns_name_countlabels(&rname);
3071                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3072         } while (!dns_name_equal(&rname, &tname));
3073         return (answer);
3074 }
3075
3076 static inline isc_result_t
3077 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3078               dns_name_t *qname)
3079 {
3080         unsigned int i, j;
3081         dns_rbtnode_t *node, *level_node, *wnode;
3082         rdatasetheader_t *header;
3083         isc_result_t result = ISC_R_NOTFOUND;
3084         dns_name_t name;
3085         dns_name_t *wname;
3086         dns_fixedname_t fwname;
3087         dns_rbtdb_t *rbtdb;
3088         isc_boolean_t done, wild, active;
3089         dns_rbtnodechain_t wchain;
3090
3091         /*
3092          * Caller must be holding the tree lock and MUST NOT be holding
3093          * any node locks.
3094          */
3095
3096         /*
3097          * Examine each ancestor level.  If the level's wild bit
3098          * is set, then construct the corresponding wildcard name and
3099          * search for it.  If the wildcard node exists, and is active in
3100          * this version, we're done.  If not, then we next check to see
3101          * if the ancestor is active in this version.  If so, then there
3102          * can be no possible wildcard match and again we're done.  If not,
3103          * continue the search.
3104          */
3105
3106         rbtdb = search->rbtdb;
3107         i = search->chain.level_matches;
3108         done = ISC_FALSE;
3109         node = *nodep;
3110         do {
3111                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3112                           isc_rwlocktype_read);
3113
3114                 /*
3115                  * First we try to figure out if this node is active in
3116                  * the search's version.  We do this now, even though we
3117                  * may not need the information, because it simplifies the
3118                  * locking and code flow.
3119                  */
3120                 for (header = node->data;
3121                      header != NULL;
3122                      header = header->next) {
3123                         if (header->serial <= search->serial &&
3124                             !IGNORE(header) && EXISTS(header))
3125                                 break;
3126                 }
3127                 if (header != NULL)
3128                         active = ISC_TRUE;
3129                 else
3130                         active = ISC_FALSE;
3131
3132                 if (node->wild)
3133                         wild = ISC_TRUE;
3134                 else
3135                         wild = ISC_FALSE;
3136
3137                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3138                             isc_rwlocktype_read);
3139
3140                 if (wild) {
3141                         /*
3142                          * Construct the wildcard name for this level.
3143                          */
3144                         dns_name_init(&name, NULL);
3145                         dns_rbt_namefromnode(node, &name);
3146                         dns_fixedname_init(&fwname);
3147                         wname = dns_fixedname_name(&fwname);
3148                         result = dns_name_concatenate(dns_wildcardname, &name,
3149                                                       wname, NULL);
3150                         j = i;
3151                         while (result == ISC_R_SUCCESS && j != 0) {
3152                                 j--;
3153                                 level_node = search->chain.levels[j];
3154                                 dns_name_init(&name, NULL);
3155                                 dns_rbt_namefromnode(level_node, &name);
3156                                 result = dns_name_concatenate(wname,
3157                                                               &name,
3158                                                               wname,
3159                                                               NULL);
3160                         }
3161                         if (result != ISC_R_SUCCESS)
3162                                 break;
3163
3164                         wnode = NULL;
3165                         dns_rbtnodechain_init(&wchain, NULL);
3166                         result = dns_rbt_findnode(rbtdb->tree, wname,
3167                                                   NULL, &wnode, &wchain,
3168                                                   DNS_RBTFIND_EMPTYDATA,
3169                                                   NULL, NULL);
3170                         if (result == ISC_R_SUCCESS) {
3171                                 nodelock_t *lock;
3172
3173                                 /*
3174                                  * We have found the wildcard node.  If it
3175                                  * is active in the search's version, we're
3176                                  * done.
3177                                  */
3178                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
3179                                 NODE_LOCK(lock, isc_rwlocktype_read);
3180                                 for (header = wnode->data;
3181                                      header != NULL;
3182                                      header = header->next) {
3183                                         if (header->serial <= search->serial &&
3184                                             !IGNORE(header) && EXISTS(header))
3185                                                 break;
3186                                 }
3187                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3188                                 if (header != NULL ||
3189                                     activeempty(search, &wchain, wname)) {
3190                                         if (activeemtpynode(search, qname,
3191                                                             wname)) {
3192                                                 return (ISC_R_NOTFOUND);
3193                                         }
3194                                         /*
3195                                          * The wildcard node is active!
3196                                          *
3197                                          * Note: result is still ISC_R_SUCCESS
3198                                          * so we don't have to set it.
3199                                          */
3200                                         *nodep = wnode;
3201                                         break;
3202                                 }
3203                         } else if (result != ISC_R_NOTFOUND &&
3204                                    result != DNS_R_PARTIALMATCH) {
3205                                 /*
3206                                  * An error has occurred.  Bail out.
3207                                  */
3208                                 break;
3209                         }
3210                 }
3211
3212                 if (active) {
3213                         /*
3214                          * The level node is active.  Any wildcarding
3215                          * present at higher levels has no
3216                          * effect and we're done.
3217                          */
3218                         result = ISC_R_NOTFOUND;
3219                         break;
3220                 }
3221
3222                 if (i > 0) {
3223                         i--;
3224                         node = search->chain.levels[i];
3225                 } else
3226                         done = ISC_TRUE;
3227         } while (!done);
3228
3229         return (result);
3230 }
3231
3232 static isc_boolean_t
3233 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3234 {
3235         dns_rdata_t rdata = DNS_RDATA_INIT;
3236         dns_rdata_nsec3_t nsec3;
3237         unsigned char *raw;                     /* RDATASLAB */
3238         unsigned int rdlen, count;
3239         isc_region_t region;
3240         isc_result_t result;
3241
3242         REQUIRE(header->type == dns_rdatatype_nsec3);
3243
3244         raw = (unsigned char *)header + sizeof(*header);
3245         count = raw[0] * 256 + raw[1]; /* count */
3246 #if DNS_RDATASET_FIXED
3247         raw += count * 4 + 2;
3248 #else
3249         raw += 2;
3250 #endif
3251         while (count-- > 0) {
3252                 rdlen = raw[0] * 256 + raw[1];
3253 #if DNS_RDATASET_FIXED
3254                 raw += 4;
3255 #else
3256                 raw += 2;
3257 #endif
3258                 region.base = raw;
3259                 region.length = rdlen;
3260                 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3261                                      dns_rdatatype_nsec3, &region);
3262                 raw += rdlen;
3263                 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3264                 INSIST(result == ISC_R_SUCCESS);
3265                 if (nsec3.hash == search->rbtversion->hash &&
3266                     nsec3.iterations == search->rbtversion->iterations &&
3267                     nsec3.salt_length == search->rbtversion->salt_length &&
3268                     memcmp(nsec3.salt, search->rbtversion->salt,
3269                            nsec3.salt_length) == 0)
3270                         return (ISC_TRUE);
3271                 dns_rdata_reset(&rdata);
3272         }
3273         return (ISC_FALSE);
3274 }
3275
3276 /*
3277  * Find node of the NSEC/NSEC3 record that is 'name'.
3278  */
3279 static inline isc_result_t
3280 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3281                   dns_name_t *foundname, dns_rdataset_t *rdataset,
3282                   dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3283                   dns_db_secure_t secure)
3284 {
3285         dns_rbtnode_t *node;
3286         rdatasetheader_t *header, *header_next, *found, *foundsig;
3287         isc_boolean_t empty_node;
3288         isc_result_t result;
3289         dns_fixedname_t fname, forigin;
3290         dns_name_t *name, *origin;
3291         dns_rdatatype_t type;
3292         rbtdb_rdatatype_t sigtype;
3293         isc_boolean_t wraps;
3294         isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3295
3296         if (tree == search->rbtdb->nsec3) {
3297                 type = dns_rdatatype_nsec3;
3298                 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3299                 wraps = ISC_TRUE;
3300         } else {
3301                 type = dns_rdatatype_nsec;
3302                 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3303                 wraps = ISC_FALSE;
3304         }
3305
3306  again:
3307         do {
3308                 node = NULL;
3309                 dns_fixedname_init(&fname);
3310                 name = dns_fixedname_name(&fname);
3311                 dns_fixedname_init(&forigin);
3312                 origin = dns_fixedname_name(&forigin);
3313                 result = dns_rbtnodechain_current(&search->chain, name,
3314                                                   origin, &node);
3315                 if (result != ISC_R_SUCCESS)
3316                         return (result);
3317                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3318                           isc_rwlocktype_read);
3319                 found = NULL;
3320                 foundsig = NULL;
3321                 empty_node = ISC_TRUE;
3322                 for (header = node->data;
3323                      header != NULL;
3324                      header = header_next) {
3325                         header_next = header->next;
3326                         /*
3327                          * Look for an active, extant NSEC or RRSIG NSEC.
3328                          */
3329                         do {
3330                                 if (header->serial <= search->serial &&
3331                                     !IGNORE(header)) {
3332                                         /*
3333                                          * Is this a "this rdataset doesn't
3334                                          * exist" record?
3335                                          */
3336                                         if (NONEXISTENT(header))
3337                                                 header = NULL;
3338                                         break;
3339                                 } else
3340                                         header = header->down;
3341                         } while (header != NULL);
3342                         if (header != NULL) {
3343                                 /*
3344                                  * We now know that there is at least one
3345                                  * active rdataset at this node.
3346                                  */
3347                                 empty_node = ISC_FALSE;
3348                                 if (header->type == type) {
3349                                         found = header;
3350                                         if (foundsig != NULL)
3351                                                 break;
3352                                 } else if (header->type == sigtype) {
3353                                         foundsig = header;
3354                                         if (found != NULL)
3355                                                 break;
3356                                 }
3357                         }
3358                 }
3359                 if (!empty_node) {
3360                         if (found != NULL && search->rbtversion->havensec3 &&
3361                             found->type == dns_rdatatype_nsec3 &&
3362                             !matchparams(found, search)) {
3363                                 empty_node = ISC_TRUE;
3364                                 found = NULL;
3365                                 foundsig = NULL;
3366                                 result = dns_rbtnodechain_prev(&search->chain,
3367                                                                NULL, NULL);
3368                         } else if (found != NULL &&
3369                                    (foundsig != NULL || !need_sig))
3370                         {
3371                                 /*
3372                                  * We've found the right NSEC/NSEC3 record.
3373                                  *
3374                                  * Note: for this to really be the right
3375                                  * NSEC record, it's essential that the NSEC
3376                                  * records of any nodes obscured by a zone
3377                                  * cut have been removed; we assume this is
3378                                  * the case.
3379                                  */
3380                                 result = dns_name_concatenate(name, origin,
3381                                                               foundname, NULL);
3382                                 if (result == ISC_R_SUCCESS) {
3383                                         if (nodep != NULL) {
3384                                                 new_reference(search->rbtdb,
3385                                                               node);
3386                                                 *nodep = node;
3387                                         }
3388                                         bind_rdataset(search->rbtdb, node,
3389                                                       found, search->now,
3390                                                       rdataset);
3391                                         if (foundsig != NULL)
3392                                                 bind_rdataset(search->rbtdb,
3393                                                               node,
3394                                                               foundsig,
3395                                                               search->now,
3396                                                               sigrdataset);
3397                                 }
3398                         } else if (found == NULL && foundsig == NULL) {
3399                                 /*
3400                                  * This node is active, but has no NSEC or
3401                                  * RRSIG NSEC.  That means it's glue or
3402                                  * other obscured zone data that isn't
3403                                  * relevant for our search.  Treat the
3404                                  * node as if it were empty and keep looking.
3405                                  */
3406                                 empty_node = ISC_TRUE;
3407                                 result = dns_rbtnodechain_prev(&search->chain,
3408                                                                NULL, NULL);
3409                         } else {
3410                                 /*
3411                                  * We found an active node, but either the
3412                                  * NSEC or the RRSIG NSEC is missing.  This
3413                                  * shouldn't happen.
3414                                  */
3415                                 result = DNS_R_BADDB;
3416                         }
3417                 } else {
3418                         /*
3419                          * This node isn't active.  We've got to keep
3420                          * looking.
3421                          */
3422                         result = dns_rbtnodechain_prev(&search->chain, NULL,
3423                                                        NULL);
3424                 }
3425                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3426                             isc_rwlocktype_read);
3427         } while (empty_node && result == ISC_R_SUCCESS);
3428
3429         if (result == ISC_R_NOMORE && wraps) {
3430                 result = dns_rbtnodechain_last(&search->chain, tree,
3431                                                NULL, NULL);
3432                 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3433                         wraps = ISC_FALSE;
3434                         goto again;
3435                 }
3436         }
3437
3438         /*
3439          * If the result is ISC_R_NOMORE, then we got to the beginning of
3440          * the database and didn't find a NSEC record.  This shouldn't
3441          * happen.
3442          */
3443         if (result == ISC_R_NOMORE)
3444                 result = DNS_R_BADDB;
3445
3446         return (result);
3447 }
3448
3449 static isc_result_t
3450 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3451           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3452           dns_dbnode_t **nodep, dns_name_t *foundname,
3453           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3454 {
3455         dns_rbtnode_t *node = NULL;
3456         isc_result_t result;
3457         rbtdb_search_t search;
3458         isc_boolean_t cname_ok = ISC_TRUE;
3459         isc_boolean_t close_version = ISC_FALSE;
3460         isc_boolean_t maybe_zonecut = ISC_FALSE;
3461         isc_boolean_t at_zonecut = ISC_FALSE;
3462         isc_boolean_t wild;
3463         isc_boolean_t empty_node;
3464         rdatasetheader_t *header, *header_next, *found, *nsecheader;
3465         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3466         rbtdb_rdatatype_t sigtype;
3467         isc_boolean_t active;
3468         dns_rbtnodechain_t chain;
3469         nodelock_t *lock;
3470         dns_rbt_t *tree;
3471
3472         search.rbtdb = (dns_rbtdb_t *)db;
3473
3474         REQUIRE(VALID_RBTDB(search.rbtdb));
3475
3476         /*
3477          * We don't care about 'now'.
3478          */
3479         UNUSED(now);
3480
3481         /*
3482          * If the caller didn't supply a version, attach to the current
3483          * version.
3484          */
3485         if (version == NULL) {
3486                 currentversion(db, &version);
3487                 close_version = ISC_TRUE;
3488         }
3489
3490         search.rbtversion = version;
3491         search.serial = search.rbtversion->serial;
3492         search.options = options;
3493         search.copy_name = ISC_FALSE;
3494         search.need_cleanup = ISC_FALSE;
3495         search.wild = ISC_FALSE;
3496         search.zonecut = NULL;
3497         dns_fixedname_init(&search.zonecut_name);
3498         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3499         search.now = 0;
3500
3501         /*
3502          * 'wild' will be true iff. we've matched a wildcard.
3503          */
3504         wild = ISC_FALSE;
3505
3506         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3507
3508         /*
3509          * Search down from the root of the tree.  If, while going down, we
3510          * encounter a callback node, zone_zonecut_callback() will search the
3511          * rdatasets at the zone cut for active DNAME or NS rdatasets.
3512          */
3513         tree =  (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3514                                                          search.rbtdb->tree;
3515         result = dns_rbt_findnode(tree, name, foundname, &node,
3516                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
3517                                   zone_zonecut_callback, &search);
3518
3519         if (result == DNS_R_PARTIALMATCH) {
3520         partial_match:
3521                 if (search.zonecut != NULL) {
3522                     result = setup_delegation(&search, nodep, foundname,
3523                                               rdataset, sigrdataset);
3524                     goto tree_exit;
3525                 }
3526
3527                 if (search.wild) {
3528                         /*
3529                          * At least one of the levels in the search chain
3530                          * potentially has a wildcard.  For each such level,
3531                          * we must see if there's a matching wildcard active
3532                          * in the current version.
3533                          */
3534                         result = find_wildcard(&search, &node, name);
3535                         if (result == ISC_R_SUCCESS) {
3536                                 result = dns_name_copy(name, foundname, NULL);
3537                                 if (result != ISC_R_SUCCESS)
3538                                         goto tree_exit;
3539                                 wild = ISC_TRUE;
3540                                 goto found;
3541                         }
3542                         else if (result != ISC_R_NOTFOUND)
3543                                 goto tree_exit;
3544                 }
3545
3546                 chain = search.chain;
3547                 active = activeempty(&search, &chain, name);
3548
3549                 /*
3550                  * If we're here, then the name does not exist, is not
3551                  * beneath a zonecut, and there's no matching wildcard.
3552                  */
3553                 if ((search.rbtversion->secure == dns_db_secure &&
3554                      !search.rbtversion->havensec3) ||
3555                     (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3556                     (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3557                 {
3558                         result = find_closest_nsec(&search, nodep, foundname,
3559                                                    rdataset, sigrdataset, tree,
3560                                                    search.rbtversion->secure);
3561                         if (result == ISC_R_SUCCESS)
3562                                 result = active ? DNS_R_EMPTYNAME :
3563                                                   DNS_R_NXDOMAIN;
3564                 } else
3565                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3566                 goto tree_exit;
3567         } else if (result != ISC_R_SUCCESS)
3568                 goto tree_exit;
3569
3570  found:
3571         /*
3572          * We have found a node whose name is the desired name, or we
3573          * have matched a wildcard.
3574          */
3575
3576         if (search.zonecut != NULL) {
3577                 /*
3578                  * If we're beneath a zone cut, we don't want to look for
3579                  * CNAMEs because they're not legitimate zone glue.
3580                  */
3581                 cname_ok = ISC_FALSE;
3582         } else {
3583                 /*
3584                  * The node may be a zone cut itself.  If it might be one,
3585                  * make sure we check for it later.
3586                  *
3587                  * DS records live above the zone cut in ordinary zone so
3588                  * we want to ignore any referral.
3589                  *
3590                  * Stub zones don't have anything "above" the delgation so
3591                  * we always return a referral.
3592                  */
3593                 if (node->find_callback &&
3594                     ((node != search.rbtdb->origin_node &&
3595                       !dns_rdatatype_atparent(type)) ||
3596                      IS_STUB(search.rbtdb)))
3597                         maybe_zonecut = ISC_TRUE;
3598         }
3599
3600         /*
3601          * Certain DNSSEC types are not subject to CNAME matching
3602          * (RFC4035, section 2.5 and RFC3007).
3603          *
3604          * We don't check for RRSIG, because we don't store RRSIG records
3605          * directly.
3606          */
3607         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3608                 cname_ok = ISC_FALSE;
3609
3610         /*
3611          * We now go looking for rdata...
3612          */
3613
3614         lock = &search.rbtdb->node_locks[node->locknum].lock;
3615         NODE_LOCK(lock, isc_rwlocktype_read);
3616
3617         found = NULL;
3618         foundsig = NULL;
3619         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3620         nsecheader = NULL;
3621         nsecsig = NULL;
3622         cnamesig = NULL;
3623         empty_node = ISC_TRUE;
3624         for (header = node->data; header != NULL; header = header_next) {
3625                 header_next = header->next;
3626                 /*
3627                  * Look for an active, extant rdataset.
3628                  */
3629                 do {
3630                         if (header->serial <= search.serial &&
3631                             !IGNORE(header)) {
3632                                 /*
3633                                  * Is this a "this rdataset doesn't
3634                                  * exist" record?
3635                                  */
3636                                 if (NONEXISTENT(header))
3637                                         header = NULL;
3638                                 break;
3639                         } else
3640                                 header = header->down;
3641                 } while (header != NULL);
3642                 if (header != NULL) {
3643                         /*
3644                          * We now know that there is at least one active
3645                          * rdataset at this node.
3646                          */
3647                         empty_node = ISC_FALSE;
3648
3649                         /*
3650                          * Do special zone cut handling, if requested.
3651                          */
3652                         if (maybe_zonecut &&
3653                             header->type == dns_rdatatype_ns) {
3654                                 /*
3655                                  * We increment the reference count on node to
3656                                  * ensure that search->zonecut_rdataset will
3657                                  * still be valid later.
3658                                  */
3659                                 new_reference(search.rbtdb, node);
3660                                 search.zonecut = node;
3661                                 search.zonecut_rdataset = header;
3662                                 search.zonecut_sigrdataset = NULL;
3663                                 search.need_cleanup = ISC_TRUE;
3664                                 maybe_zonecut = ISC_FALSE;
3665                                 at_zonecut = ISC_TRUE;
3666                                 /*
3667                                  * It is not clear if KEY should still be
3668                                  * allowed at the parent side of the zone
3669                                  * cut or not.  It is needed for RFC3007
3670                                  * validated updates.
3671                                  */
3672                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3673                                     && type != dns_rdatatype_nsec
3674                                     && type != dns_rdatatype_key) {
3675                                         /*
3676                                          * Glue is not OK, but any answer we
3677                                          * could return would be glue.  Return
3678                                          * the delegation.
3679                                          */
3680                                         found = NULL;
3681                                         break;
3682                                 }
3683                                 if (found != NULL && foundsig != NULL)
3684                                         break;
3685                         }
3686
3687
3688                         /*
3689                          * If the NSEC3 record doesn't match the chain
3690                          * we are using behave as if it isn't here.
3691                          */
3692                         if (header->type == dns_rdatatype_nsec3 &&
3693                            !matchparams(header, &search)) {
3694                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3695                                 goto partial_match;
3696                         }
3697                         /*
3698                          * If we found a type we were looking for,
3699                          * remember it.
3700                          */
3701                         if (header->type == type ||
3702                             type == dns_rdatatype_any ||
3703                             (header->type == dns_rdatatype_cname &&
3704                              cname_ok)) {
3705                                 /*
3706                                  * We've found the answer!
3707                                  */
3708                                 found = header;
3709                                 if (header->type == dns_rdatatype_cname &&
3710                                     cname_ok) {
3711                                         /*
3712                                          * We may be finding a CNAME instead
3713                                          * of the desired type.
3714                                          *
3715                                          * If we've already got the CNAME RRSIG,
3716                                          * use it, otherwise change sigtype
3717                                          * so that we find it.
3718                                          */
3719                                         if (cnamesig != NULL)
3720                                                 foundsig = cnamesig;
3721                                         else
3722                                                 sigtype =
3723                                                     RBTDB_RDATATYPE_SIGCNAME;
3724                                 }
3725                                 /*
3726                                  * If we've got all we need, end the search.
3727                                  */
3728                                 if (!maybe_zonecut && foundsig != NULL)
3729                                         break;
3730                         } else if (header->type == sigtype) {
3731                                 /*
3732                                  * We've found the RRSIG rdataset for our
3733                                  * target type.  Remember it.
3734                                  */
3735                                 foundsig = header;
3736                                 /*
3737                                  * If we've got all we need, end the search.
3738                                  */
3739                                 if (!maybe_zonecut && found != NULL)
3740                                         break;
3741                         } else if (header->type == dns_rdatatype_nsec &&
3742                                    !search.rbtversion->havensec3) {
3743                                 /*
3744                                  * Remember a NSEC rdataset even if we're
3745                                  * not specifically looking for it, because
3746                                  * we might need it later.
3747                                  */
3748                                 nsecheader = header;
3749                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3750                                    !search.rbtversion->havensec3) {
3751                                 /*
3752                                  * If we need the NSEC rdataset, we'll also
3753                                  * need its signature.
3754                                  */
3755                                 nsecsig = header;
3756                         } else if (cname_ok &&
3757                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3758                                 /*
3759                                  * If we get a CNAME match, we'll also need
3760                                  * its signature.
3761                                  */
3762                                 cnamesig = header;
3763                         }
3764                 }
3765         }
3766
3767         if (empty_node) {
3768                 /*
3769                  * We have an exact match for the name, but there are no
3770                  * active rdatasets in the desired version.  That means that
3771                  * this node doesn't exist in the desired version, and that
3772                  * we really have a partial match.
3773                  */
3774                 if (!wild) {
3775                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3776                         goto partial_match;
3777                 }
3778         }
3779
3780         /*
3781          * If we didn't find what we were looking for...
3782          */
3783         if (found == NULL) {
3784                 if (search.zonecut != NULL) {
3785                         /*
3786                          * We were trying to find glue at a node beneath a
3787                          * zone cut, but didn't.
3788                          *
3789                          * Return the delegation.
3790                          */
3791                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3792                         result = setup_delegation(&search, nodep, foundname,
3793                                                   rdataset, sigrdataset);
3794                         goto tree_exit;
3795                 }
3796                 /*
3797                  * The desired type doesn't exist.
3798                  */
3799                 result = DNS_R_NXRRSET;
3800                 if (search.rbtversion->secure == dns_db_secure &&
3801                     !search.rbtversion->havensec3 &&
3802                     (nsecheader == NULL || nsecsig == NULL)) {
3803                         /*
3804                          * The zone is secure but there's no NSEC,
3805                          * or the NSEC has no signature!
3806                          */
3807                         if (!wild) {
3808                                 result = DNS_R_BADDB;
3809                                 goto node_exit;
3810                         }
3811
3812                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3813                         result = find_closest_nsec(&search, nodep, foundname,
3814                                                    rdataset, sigrdataset,
3815                                                    search.rbtdb->tree,
3816                                                    search.rbtversion->secure);
3817                         if (result == ISC_R_SUCCESS)
3818                                 result = DNS_R_EMPTYWILD;
3819                         goto tree_exit;
3820                 }
3821                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3822                     nsecheader == NULL)
3823                 {
3824                         /*
3825                          * There's no NSEC record, and we were told
3826                          * to find one.
3827                          */
3828                         result = DNS_R_BADDB;
3829                         goto node_exit;
3830                 }
3831                 if (nodep != NULL) {
3832                         new_reference(search.rbtdb, node);
3833                         *nodep = node;
3834                 }
3835                 if ((search.rbtversion->secure == dns_db_secure &&
3836                      !search.rbtversion->havensec3) ||
3837                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
3838                 {
3839                         bind_rdataset(search.rbtdb, node, nsecheader,
3840                                       0, rdataset);
3841                         if (nsecsig != NULL)
3842                                 bind_rdataset(search.rbtdb, node,
3843                                               nsecsig, 0, sigrdataset);
3844                 }
3845                 if (wild)
3846                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3847                 goto node_exit;
3848         }
3849
3850         /*
3851          * We found what we were looking for, or we found a CNAME.
3852          */
3853
3854         if (type != found->type &&
3855             type != dns_rdatatype_any &&
3856             found->type == dns_rdatatype_cname) {
3857                 /*
3858                  * We weren't doing an ANY query and we found a CNAME instead
3859                  * of the type we were looking for, so we need to indicate
3860                  * that result to the caller.
3861                  */
3862                 result = DNS_R_CNAME;
3863         } else if (search.zonecut != NULL) {
3864                 /*
3865                  * If we're beneath a zone cut, we must indicate that the
3866                  * result is glue, unless we're actually at the zone cut
3867                  * and the type is NSEC or KEY.
3868                  */
3869                 if (search.zonecut == node) {
3870                         /*
3871                          * It is not clear if KEY should still be
3872                          * allowed at the parent side of the zone
3873                          * cut or not.  It is needed for RFC3007
3874                          * validated updates.
3875                          */
3876                         if (type == dns_rdatatype_nsec ||
3877                             type == dns_rdatatype_nsec3 ||
3878                             type == dns_rdatatype_key)
3879                                 result = ISC_R_SUCCESS;
3880                         else if (type == dns_rdatatype_any)
3881                                 result = DNS_R_ZONECUT;
3882                         else
3883                                 result = DNS_R_GLUE;
3884                 } else
3885                         result = DNS_R_GLUE;
3886                 /*
3887                  * We might have found data that isn't glue, but was occluded
3888                  * by a dynamic update.  If the caller cares about this, they
3889                  * will have told us to validate glue.
3890                  *
3891                  * XXX We should cache the glue validity state!
3892                  */
3893                 if (result == DNS_R_GLUE &&
3894                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3895                     !valid_glue(&search, foundname, type, node)) {
3896                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3897                         result = setup_delegation(&search, nodep, foundname,
3898                                                   rdataset, sigrdataset);
3899                     goto tree_exit;
3900                 }
3901         } else {
3902                 /*
3903                  * An ordinary successful query!
3904                  */
3905                 result = ISC_R_SUCCESS;
3906         }
3907
3908         if (nodep != NULL) {
3909                 if (!at_zonecut)
3910                         new_reference(search.rbtdb, node);
3911                 else
3912                         search.need_cleanup = ISC_FALSE;
3913                 *nodep = node;
3914         }
3915
3916         if (type != dns_rdatatype_any) {
3917                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3918                 if (foundsig != NULL)
3919                         bind_rdataset(search.rbtdb, node, foundsig, 0,
3920                                       sigrdataset);
3921         }
3922
3923         if (wild)
3924                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3925
3926  node_exit:
3927         NODE_UNLOCK(lock, isc_rwlocktype_read);
3928
3929  tree_exit:
3930         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3931
3932         /*
3933          * If we found a zonecut but aren't going to use it, we have to
3934          * let go of it.
3935          */
3936         if (search.need_cleanup) {
3937                 node = search.zonecut;
3938                 INSIST(node != NULL);
3939                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3940
3941                 NODE_LOCK(lock, isc_rwlocktype_read);
3942                 decrement_reference(search.rbtdb, node, 0,
3943                                     isc_rwlocktype_read, isc_rwlocktype_none,
3944                                     ISC_FALSE);
3945                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3946         }
3947
3948         if (close_version)
3949                 closeversion(db, &version, ISC_FALSE);
3950
3951         dns_rbtnodechain_reset(&search.chain);
3952
3953         return (result);
3954 }
3955
3956 static isc_result_t
3957 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3958                  isc_stdtime_t now, dns_dbnode_t **nodep,
3959                  dns_name_t *foundname,
3960                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3961 {
3962         UNUSED(db);
3963         UNUSED(name);
3964         UNUSED(options);
3965         UNUSED(now);
3966         UNUSED(nodep);
3967         UNUSED(foundname);
3968         UNUSED(rdataset);
3969         UNUSED(sigrdataset);
3970
3971         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3972
3973         return (ISC_R_NOTIMPLEMENTED);
3974 }
3975
3976 static isc_result_t
3977 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3978         rbtdb_search_t *search = arg;
3979         rdatasetheader_t *header, *header_prev, *header_next;
3980         rdatasetheader_t *dname_header, *sigdname_header;
3981         isc_result_t result;
3982         nodelock_t *lock;
3983         isc_rwlocktype_t locktype;
3984
3985         /* XXX comment */
3986
3987         REQUIRE(search->zonecut == NULL);
3988
3989         /*
3990          * Keep compiler silent.
3991          */
3992         UNUSED(name);
3993
3994         lock = &(search->rbtdb->node_locks[node->locknum].lock);
3995         locktype = isc_rwlocktype_read;
3996         NODE_LOCK(lock, locktype);
3997
3998         /*
3999          * Look for a DNAME or RRSIG DNAME rdataset.
4000          */
4001         dname_header = NULL;
4002         sigdname_header = NULL;
4003         header_prev = NULL;
4004         for (header = node->data; header != NULL; header = header_next) {
4005                 header_next = header->next;
4006                 if (header->rdh_ttl <= search->now) {
4007                         /*
4008                          * This rdataset is stale.  If no one else is
4009                          * using the node, we can clean it up right
4010                          * now, otherwise we mark it as stale, and
4011                          * the node as dirty, so it will get cleaned
4012                          * up later.
4013                          */
4014                         if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
4015                             (locktype == isc_rwlocktype_write ||
4016                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4017                                 /*
4018                                  * We update the node's status only when we
4019                                  * can get write access; otherwise, we leave
4020                                  * others to this work.  Periodical cleaning
4021                                  * will eventually take the job as the last
4022                                  * resort.
4023                                  * We won't downgrade the lock, since other
4024                                  * rdatasets are probably stale, too.
4025                                  */
4026                                 locktype = isc_rwlocktype_write;
4027
4028                                 if (dns_rbtnode_refcurrent(node) == 0) {
4029                                         isc_mem_t *mctx;
4030
4031                                         /*
4032                                          * header->down can be non-NULL if the
4033                                          * refcount has just decremented to 0
4034                                          * but decrement_reference() has not
4035                                          * performed clean_cache_node(), in
4036                                          * which case we need to purge the
4037                                          * stale headers first.
4038                                          */
4039                                         mctx = search->rbtdb->common.mctx;
4040                                         clean_stale_headers(search->rbtdb,
4041                                                             mctx,
4042                                                             header);
4043                                         if (header_prev != NULL)
4044                                                 header_prev->next =
4045                                                         header->next;
4046                                         else
4047                                                 node->data = header->next;
4048                                         free_rdataset(search->rbtdb, mctx,
4049                                                       header);
4050                                 } else {
4051                                         header->attributes |=
4052                                                 RDATASET_ATTR_STALE;
4053                                         node->dirty = 1;
4054                                         header_prev = header;
4055                                 }
4056                         } else
4057                                 header_prev = header;
4058                 } else if (header->type == dns_rdatatype_dname &&
4059                            EXISTS(header)) {
4060                         dname_header = header;
4061                         header_prev = header;
4062                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4063                          EXISTS(header)) {
4064                         sigdname_header = header;
4065                         header_prev = header;
4066                 } else
4067                         header_prev = header;
4068         }
4069
4070         if (dname_header != NULL &&
4071             (!DNS_TRUST_PENDING(dname_header->trust) ||
4072              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4073                 /*
4074                  * We increment the reference count on node to ensure that
4075                  * search->zonecut_rdataset will still be valid later.
4076                  */
4077                 new_reference(search->rbtdb, node);
4078                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4079                 search->zonecut = node;
4080                 search->zonecut_rdataset = dname_header;
4081                 search->zonecut_sigrdataset = sigdname_header;
4082                 search->need_cleanup = ISC_TRUE;
4083                 result = DNS_R_PARTIALMATCH;
4084         } else
4085                 result = DNS_R_CONTINUE;
4086
4087         NODE_UNLOCK(lock, locktype);
4088
4089         return (result);
4090 }
4091
4092 static inline isc_result_t
4093 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4094                      dns_dbnode_t **nodep, dns_name_t *foundname,
4095                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4096 {
4097         unsigned int i;
4098         dns_rbtnode_t *level_node;
4099         rdatasetheader_t *header, *header_prev, *header_next;
4100         rdatasetheader_t *found, *foundsig;
4101         isc_result_t result = ISC_R_NOTFOUND;
4102         dns_name_t name;
4103         dns_rbtdb_t *rbtdb;
4104         isc_boolean_t done;
4105         nodelock_t *lock;
4106         isc_rwlocktype_t locktype;
4107
4108         /*
4109          * Caller must be holding the tree lock.
4110          */
4111
4112         rbtdb = search->rbtdb;
4113         i = search->chain.level_matches;
4114         done = ISC_FALSE;
4115         do {
4116                 locktype = isc_rwlocktype_read;
4117                 lock = &rbtdb->node_locks[node->locknum].lock;
4118                 NODE_LOCK(lock, locktype);
4119
4120                 /*
4121                  * Look for NS and RRSIG NS rdatasets.
4122                  */
4123                 found = NULL;
4124                 foundsig = NULL;
4125                 header_prev = NULL;
4126                 for (header = node->data;
4127                      header != NULL;
4128                      header = header_next) {
4129                         header_next = header->next;
4130                         if (header->rdh_ttl <= search->now) {
4131                                 /*
4132                                  * This rdataset is stale.  If no one else is
4133                                  * using the node, we can clean it up right
4134                                  * now, otherwise we mark it as stale, and
4135                                  * the node as dirty, so it will get cleaned
4136                                  * up later.
4137                                  */
4138                                 if ((header->rdh_ttl <= search->now -
4139                                                     RBTDB_VIRTUAL) &&
4140                                     (locktype == isc_rwlocktype_write ||
4141                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4142                                         /*
4143                                          * We update the node's status only
4144                                          * when we can get write access.
4145                                          */
4146                                         locktype = isc_rwlocktype_write;
4147
4148                                         if (dns_rbtnode_refcurrent(node)
4149                                             == 0) {
4150                                                 isc_mem_t *m;
4151
4152                                                 m = search->rbtdb->common.mctx;
4153                                                 clean_stale_headers(
4154                                                         search->rbtdb,
4155                                                         m, header);
4156                                                 if (header_prev != NULL)
4157                                                         header_prev->next =
4158                                                                 header->next;
4159                                                 else
4160                                                         node->data =
4161                                                                 header->next;
4162                                                 free_rdataset(rbtdb, m,
4163                                                               header);
4164                                         } else {
4165                                                 header->attributes |=
4166                                                         RDATASET_ATTR_STALE;
4167                                                 node->dirty = 1;
4168                                                 header_prev = header;
4169                                         }
4170                                 } else
4171                                         header_prev = header;
4172                         } else if (EXISTS(header)) {
4173                                 /*
4174                                  * We've found an extant rdataset.  See if
4175                                  * we're interested in it.
4176                                  */
4177                                 if (header->type == dns_rdatatype_ns) {
4178                                         found = header;
4179                                         if (foundsig != NULL)
4180                                                 break;
4181                                 } else if (header->type ==
4182                                            RBTDB_RDATATYPE_SIGNS) {
4183                                         foundsig = header;
4184                                         if (found != NULL)
4185                                                 break;
4186                                 }
4187                                 header_prev = header;
4188                         } else
4189                                 header_prev = header;
4190                 }
4191
4192                 if (found != NULL) {
4193                         /*
4194                          * If we have to set foundname, we do it before
4195                          * anything else.  If we were to set foundname after
4196                          * we had set nodep or bound the rdataset, then we'd
4197                          * have to undo that work if dns_name_concatenate()
4198                          * failed.  By setting foundname first, there's
4199                          * nothing to undo if we have trouble.
4200                          */
4201                         if (foundname != NULL) {
4202                                 dns_name_init(&name, NULL);
4203                                 dns_rbt_namefromnode(node, &name);
4204                                 result = dns_name_copy(&name, foundname, NULL);
4205                                 while (result == ISC_R_SUCCESS && i > 0) {
4206                                         i--;
4207                                         level_node = search->chain.levels[i];
4208                                         dns_name_init(&name, NULL);
4209                                         dns_rbt_namefromnode(level_node,
4210                                                              &name);
4211                                         result =
4212                                                 dns_name_concatenate(foundname,
4213                                                                      &name,
4214                                                                      foundname,
4215                                                                      NULL);
4216                                 }
4217                                 if (result != ISC_R_SUCCESS) {
4218                                         *nodep = NULL;
4219                                         goto node_exit;
4220                                 }
4221                         }
4222                         result = DNS_R_DELEGATION;
4223                         if (nodep != NULL) {
4224                                 new_reference(search->rbtdb, node);
4225                                 *nodep = node;
4226                         }
4227                         bind_rdataset(search->rbtdb, node, found, search->now,
4228                                       rdataset);
4229                         if (foundsig != NULL)
4230                                 bind_rdataset(search->rbtdb, node, foundsig,
4231                                               search->now, sigrdataset);
4232                         if (need_headerupdate(found, search->now) ||
4233                             (foundsig != NULL &&
4234                              need_headerupdate(foundsig, search->now))) {
4235                                 if (locktype != isc_rwlocktype_write) {
4236                                         NODE_UNLOCK(lock, locktype);
4237                                         NODE_LOCK(lock, isc_rwlocktype_write);
4238                                         locktype = isc_rwlocktype_write;
4239                                         POST(locktype);
4240                                 }
4241                                 if (need_headerupdate(found, search->now))
4242                                         update_header(search->rbtdb, found,
4243                                                       search->now);
4244                                 if (foundsig != NULL &&
4245                                     need_headerupdate(foundsig, search->now)) {
4246                                         update_header(search->rbtdb, foundsig,
4247                                                       search->now);
4248                                 }
4249                         }
4250                 }
4251
4252         node_exit:
4253                 NODE_UNLOCK(lock, locktype);
4254
4255                 if (found == NULL && i > 0) {
4256                         i--;
4257                         node = search->chain.levels[i];
4258                 } else
4259                         done = ISC_TRUE;
4260
4261         } while (!done);
4262
4263         return (result);
4264 }
4265
4266 static isc_result_t
4267 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4268                   isc_stdtime_t now, dns_name_t *foundname,
4269                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4270 {
4271         dns_rbtnode_t *node;
4272         rdatasetheader_t *header, *header_next, *header_prev;
4273         rdatasetheader_t *found, *foundsig;
4274         isc_boolean_t empty_node;
4275         isc_result_t result;
4276         dns_fixedname_t fname, forigin;
4277         dns_name_t *name, *origin;
4278         rbtdb_rdatatype_t matchtype, sigmatchtype;
4279         nodelock_t *lock;
4280         isc_rwlocktype_t locktype;
4281
4282         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4283         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4284                                              dns_rdatatype_nsec);
4285
4286         do {
4287                 node = NULL;
4288                 dns_fixedname_init(&fname);
4289                 name = dns_fixedname_name(&fname);
4290                 dns_fixedname_init(&forigin);
4291                 origin = dns_fixedname_name(&forigin);
4292                 result = dns_rbtnodechain_current(&search->chain, name,
4293                                                   origin, &node);
4294                 if (result != ISC_R_SUCCESS)
4295                         return (result);
4296                 locktype = isc_rwlocktype_read;
4297                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4298                 NODE_LOCK(lock, locktype);
4299                 found = NULL;
4300                 foundsig = NULL;
4301                 empty_node = ISC_TRUE;
4302                 header_prev = NULL;
4303                 for (header = node->data;
4304                      header != NULL;
4305                      header = header_next) {
4306                         header_next = header->next;
4307                         if (header->rdh_ttl <= now) {
4308                                 /*
4309                                  * This rdataset is stale.  If no one else is
4310                                  * using the node, we can clean it up right
4311                                  * now, otherwise we mark it as stale, and the
4312                                  * node as dirty, so it will get cleaned up
4313                                  * later.
4314                                  */
4315                                 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4316                                     (locktype == isc_rwlocktype_write ||
4317                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4318                                         /*
4319                                          * We update the node's status only
4320                                          * when we can get write access.
4321                                          */
4322                                         locktype = isc_rwlocktype_write;
4323
4324                                         if (dns_rbtnode_refcurrent(node)
4325                                             == 0) {
4326                                                 isc_mem_t *m;
4327
4328                                                 m = search->rbtdb->common.mctx;
4329                                                 clean_stale_headers(
4330                                                         search->rbtdb,
4331                                                         m, header);
4332                                                 if (header_prev != NULL)
4333                                                         header_prev->next =
4334                                                                 header->next;
4335                                                 else
4336                                                         node->data = header->next;
4337                                                 free_rdataset(search->rbtdb, m,
4338                                                               header);
4339                                         } else {
4340                                                 header->attributes |=
4341                                                         RDATASET_ATTR_STALE;
4342                                                 node->dirty = 1;
4343                                                 header_prev = header;
4344                                         }
4345                                 } else
4346                                         header_prev = header;
4347                                 continue;
4348                         }
4349                         if (NONEXISTENT(header) ||
4350                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
4351                                 header_prev = header;
4352                                 continue;
4353                         }
4354                         empty_node = ISC_FALSE;
4355                         if (header->type == matchtype)
4356                                 found = header;
4357                         else if (header->type == sigmatchtype)
4358                                 foundsig = header;
4359                         header_prev = header;
4360                 }
4361                 if (found != NULL) {
4362                         result = dns_name_concatenate(name, origin,
4363                                                       foundname, NULL);
4364                         if (result != ISC_R_SUCCESS)
4365                                 goto unlock_node;
4366                         bind_rdataset(search->rbtdb, node, found,
4367                                       now, rdataset);
4368                         if (foundsig != NULL)
4369                                 bind_rdataset(search->rbtdb, node, foundsig,
4370                                               now, sigrdataset);
4371                         new_reference(search->rbtdb, node);
4372                         *nodep = node;
4373                         result = DNS_R_COVERINGNSEC;
4374                 } else if (!empty_node) {
4375                         result = ISC_R_NOTFOUND;
4376                 } else
4377                         result = dns_rbtnodechain_prev(&search->chain, NULL,
4378                                                        NULL);
4379  unlock_node:
4380                 NODE_UNLOCK(lock, locktype);
4381         } while (empty_node && result == ISC_R_SUCCESS);
4382         return (result);
4383 }
4384
4385 static isc_result_t
4386 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4387            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4388            dns_dbnode_t **nodep, dns_name_t *foundname,
4389            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4390 {
4391         dns_rbtnode_t *node = NULL;
4392         isc_result_t result;
4393         rbtdb_search_t search;
4394         isc_boolean_t cname_ok = ISC_TRUE;
4395         isc_boolean_t empty_node;
4396         nodelock_t *lock;
4397         isc_rwlocktype_t locktype;
4398         rdatasetheader_t *header, *header_prev, *header_next;
4399         rdatasetheader_t *found, *nsheader;
4400         rdatasetheader_t *foundsig, *nssig, *cnamesig;
4401         rdatasetheader_t *update, *updatesig;
4402         rbtdb_rdatatype_t sigtype, negtype;
4403
4404         UNUSED(version);
4405
4406         search.rbtdb = (dns_rbtdb_t *)db;
4407
4408         REQUIRE(VALID_RBTDB(search.rbtdb));
4409         REQUIRE(version == NULL);
4410
4411         if (now == 0)
4412                 isc_stdtime_get(&now);
4413
4414         search.rbtversion = NULL;
4415         search.serial = 1;
4416         search.options = options;
4417         search.copy_name = ISC_FALSE;
4418         search.need_cleanup = ISC_FALSE;
4419         search.wild = ISC_FALSE;
4420         search.zonecut = NULL;
4421         dns_fixedname_init(&search.zonecut_name);
4422         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4423         search.now = now;
4424         update = NULL;
4425         updatesig = NULL;
4426
4427         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4428
4429         /*
4430          * Search down from the root of the tree.  If, while going down, we
4431          * encounter a callback node, cache_zonecut_callback() will search the
4432          * rdatasets at the zone cut for a DNAME rdataset.
4433          */
4434         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4435                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
4436                                   cache_zonecut_callback, &search);
4437
4438         if (result == DNS_R_PARTIALMATCH) {
4439                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4440                         result = find_coveringnsec(&search, nodep, now,
4441                                                    foundname, rdataset,
4442                                                    sigrdataset);
4443                         if (result == DNS_R_COVERINGNSEC)
4444                                 goto tree_exit;
4445                 }
4446                 if (search.zonecut != NULL) {
4447                     result = setup_delegation(&search, nodep, foundname,
4448                                               rdataset, sigrdataset);
4449                     goto tree_exit;
4450                 } else {
4451                 find_ns:
4452                         result = find_deepest_zonecut(&search, node, nodep,
4453                                                       foundname, rdataset,
4454                                                       sigrdataset);
4455                         goto tree_exit;
4456                 }
4457         } else if (result != ISC_R_SUCCESS)
4458                 goto tree_exit;
4459
4460         /*
4461          * Certain DNSSEC types are not subject to CNAME matching
4462          * (RFC4035, section 2.5 and RFC3007).
4463          *
4464          * We don't check for RRSIG, because we don't store RRSIG records
4465          * directly.
4466          */
4467         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4468                 cname_ok = ISC_FALSE;
4469
4470         /*
4471          * We now go looking for rdata...
4472          */
4473
4474         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4475         locktype = isc_rwlocktype_read;
4476         NODE_LOCK(lock, locktype);
4477
4478         found = NULL;
4479         foundsig = NULL;
4480         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4481         negtype = RBTDB_RDATATYPE_VALUE(0, type);
4482         nsheader = NULL;
4483         nssig = NULL;
4484         cnamesig = NULL;
4485         empty_node = ISC_TRUE;
4486         header_prev = NULL;
4487         for (header = node->data; header != NULL; header = header_next) {
4488                 header_next = header->next;
4489                 if (header->rdh_ttl <= now) {
4490                         /*
4491                          * This rdataset is stale.  If no one else is using the
4492                          * node, we can clean it up right now, otherwise we
4493                          * mark it as stale, and the node as dirty, so it will
4494                          * get cleaned up later.
4495                          */
4496                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4497                             (locktype == isc_rwlocktype_write ||
4498                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4499                                 /*
4500                                  * We update the node's status only when we
4501                                  * can get write access.
4502                                  */
4503                                 locktype = isc_rwlocktype_write;
4504
4505                                 if (dns_rbtnode_refcurrent(node) == 0) {
4506                                         isc_mem_t *mctx;
4507
4508                                         mctx = search.rbtdb->common.mctx;
4509                                         clean_stale_headers(search.rbtdb, mctx,
4510                                                             header);
4511                                         if (header_prev != NULL)
4512                                                 header_prev->next =
4513                                                         header->next;
4514                                         else
4515                                                 node->data = header->next;
4516                                         free_rdataset(search.rbtdb, mctx,
4517                                                       header);
4518                                 } else {
4519                                         header->attributes |=
4520                                                 RDATASET_ATTR_STALE;
4521                                         node->dirty = 1;
4522                                         header_prev = header;
4523                                 }
4524                         } else
4525                                 header_prev = header;
4526                 } else if (EXISTS(header)) {
4527                         /*
4528                          * We now know that there is at least one active
4529                          * non-stale rdataset at this node.
4530                          */
4531                         empty_node = ISC_FALSE;
4532
4533                         /*
4534                          * If we found a type we were looking for, remember
4535                          * it.
4536                          */
4537                         if (header->type == type ||
4538                             (type == dns_rdatatype_any &&
4539                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4540                             (cname_ok && header->type ==
4541                              dns_rdatatype_cname)) {
4542                                 /*
4543                                  * We've found the answer.
4544                                  */
4545                                 found = header;
4546                                 if (header->type == dns_rdatatype_cname &&
4547                                     cname_ok &&
4548                                     cnamesig != NULL) {
4549                                         /*
4550                                          * If we've already got the
4551                                          * CNAME RRSIG, use it.
4552                                          */
4553                                         foundsig = cnamesig;
4554                                 }
4555                         } else if (header->type == sigtype) {
4556                                 /*
4557                                  * We've found the RRSIG rdataset for our
4558                                  * target type.  Remember it.
4559                                  */
4560                                 foundsig = header;
4561                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4562                                    header->type == negtype) {
4563                                 /*
4564                                  * We've found a negative cache entry.
4565                                  */
4566                                 found = header;
4567                         } else if (header->type == dns_rdatatype_ns) {
4568                                 /*
4569                                  * Remember a NS rdataset even if we're
4570                                  * not specifically looking for it, because
4571                                  * we might need it later.
4572                                  */
4573                                 nsheader = header;
4574                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4575                                 /*
4576                                  * If we need the NS rdataset, we'll also
4577                                  * need its signature.
4578                                  */
4579                                 nssig = header;
4580                         } else if (cname_ok &&
4581                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
4582                                 /*
4583                                  * If we get a CNAME match, we'll also need
4584                                  * its signature.
4585                                  */
4586                                 cnamesig = header;
4587                         }
4588                         header_prev = header;
4589                 } else
4590                         header_prev = header;
4591         }
4592
4593         if (empty_node) {
4594                 /*
4595                  * We have an exact match for the name, but there are no
4596                  * extant rdatasets.  That means that this node doesn't
4597                  * meaningfully exist, and that we really have a partial match.
4598                  */
4599                 NODE_UNLOCK(lock, locktype);
4600                 goto find_ns;
4601         }
4602
4603         /*
4604          * If we didn't find what we were looking for...
4605          */
4606         if (found == NULL ||
4607             (DNS_TRUST_ADDITIONAL(found->trust) &&
4608              ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4609             (found->trust == dns_trust_glue &&
4610              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4611             (DNS_TRUST_PENDING(found->trust) &&
4612              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4613                 /*
4614                  * If there is an NS rdataset at this node, then this is the
4615                  * deepest zone cut.
4616                  */
4617                 if (nsheader != NULL) {
4618                         if (nodep != NULL) {
4619                                 new_reference(search.rbtdb, node);
4620                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4621                                 *nodep = node;
4622                         }
4623                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
4624                                       rdataset);
4625                         if (need_headerupdate(nsheader, search.now))
4626                                 update = nsheader;
4627                         if (nssig != NULL) {
4628                                 bind_rdataset(search.rbtdb, node, nssig,
4629                                               search.now, sigrdataset);
4630                                 if (need_headerupdate(nssig, search.now))
4631                                         updatesig = nssig;
4632                         }
4633                         result = DNS_R_DELEGATION;
4634                         goto node_exit;
4635                 }
4636
4637                 /*
4638                  * Go find the deepest zone cut.
4639                  */
4640                 NODE_UNLOCK(lock, locktype);
4641                 goto find_ns;
4642         }
4643
4644         /*
4645          * We found what we were looking for, or we found a CNAME.
4646          */
4647
4648         if (nodep != NULL) {
4649                 new_reference(search.rbtdb, node);
4650                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4651                 *nodep = node;
4652         }
4653
4654         if (NEGATIVE(found)) {
4655                 /*
4656                  * We found a negative cache entry.
4657                  */
4658                 if (NXDOMAIN(found))
4659                         result = DNS_R_NCACHENXDOMAIN;
4660                 else
4661                         result = DNS_R_NCACHENXRRSET;
4662         } else if (type != found->type &&
4663                    type != dns_rdatatype_any &&
4664                    found->type == dns_rdatatype_cname) {
4665                 /*
4666                  * We weren't doing an ANY query and we found a CNAME instead
4667                  * of the type we were looking for, so we need to indicate
4668                  * that result to the caller.
4669                  */
4670                 result = DNS_R_CNAME;
4671         } else {
4672                 /*
4673                  * An ordinary successful query!
4674                  */
4675                 result = ISC_R_SUCCESS;
4676         }
4677
4678         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4679             result == DNS_R_NCACHENXRRSET) {
4680                 bind_rdataset(search.rbtdb, node, found, search.now,
4681                               rdataset);
4682                 if (need_headerupdate(found, search.now))
4683                         update = found;
4684                 if (foundsig != NULL) {
4685                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
4686                                       sigrdataset);
4687                         if (need_headerupdate(foundsig, search.now))
4688                                 updatesig = foundsig;
4689                 }
4690         }
4691
4692  node_exit:
4693         if ((update != NULL || updatesig != NULL) &&
4694             locktype != isc_rwlocktype_write) {
4695                 NODE_UNLOCK(lock, locktype);
4696                 NODE_LOCK(lock, isc_rwlocktype_write);
4697                 locktype = isc_rwlocktype_write;
4698                 POST(locktype);
4699         }
4700         if (update != NULL && need_headerupdate(update, search.now))
4701                 update_header(search.rbtdb, update, search.now);
4702         if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4703                 update_header(search.rbtdb, updatesig, search.now);
4704
4705         NODE_UNLOCK(lock, locktype);
4706
4707  tree_exit:
4708         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4709
4710         /*
4711          * If we found a zonecut but aren't going to use it, we have to
4712          * let go of it.
4713          */
4714         if (search.need_cleanup) {
4715                 node = search.zonecut;
4716                 INSIST(node != NULL);
4717                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4718
4719                 NODE_LOCK(lock, isc_rwlocktype_read);
4720                 decrement_reference(search.rbtdb, node, 0,
4721                                     isc_rwlocktype_read, isc_rwlocktype_none,
4722                                     ISC_FALSE);
4723                 NODE_UNLOCK(lock, isc_rwlocktype_read);
4724         }
4725
4726         dns_rbtnodechain_reset(&search.chain);
4727
4728         return (result);
4729 }
4730
4731 static isc_result_t
4732 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4733                   isc_stdtime_t now, dns_dbnode_t **nodep,
4734                   dns_name_t *foundname,
4735                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4736 {
4737         dns_rbtnode_t *node = NULL;
4738         nodelock_t *lock;
4739         isc_result_t result;
4740         rbtdb_search_t search;
4741         rdatasetheader_t *header, *header_prev, *header_next;
4742         rdatasetheader_t *found, *foundsig;
4743         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4744         isc_rwlocktype_t locktype;
4745
4746         search.rbtdb = (dns_rbtdb_t *)db;
4747
4748         REQUIRE(VALID_RBTDB(search.rbtdb));
4749
4750         if (now == 0)
4751                 isc_stdtime_get(&now);
4752
4753         search.rbtversion = NULL;
4754         search.serial = 1;
4755         search.options = options;
4756         search.copy_name = ISC_FALSE;
4757         search.need_cleanup = ISC_FALSE;
4758         search.wild = ISC_FALSE;
4759         search.zonecut = NULL;
4760         dns_fixedname_init(&search.zonecut_name);
4761         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4762         search.now = now;
4763
4764         if ((options & DNS_DBFIND_NOEXACT) != 0)
4765                 rbtoptions |= DNS_RBTFIND_NOEXACT;
4766
4767         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4768
4769         /*
4770          * Search down from the root of the tree.
4771          */
4772         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4773                                   &search.chain, rbtoptions, NULL, &search);
4774
4775         if (result == DNS_R_PARTIALMATCH) {
4776         find_ns:
4777                 result = find_deepest_zonecut(&search, node, nodep, foundname,
4778                                               rdataset, sigrdataset);
4779                 goto tree_exit;
4780         } else if (result != ISC_R_SUCCESS)
4781                 goto tree_exit;
4782
4783         /*
4784          * We now go looking for an NS rdataset at the node.
4785          */
4786
4787         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4788         locktype = isc_rwlocktype_read;
4789         NODE_LOCK(lock, locktype);
4790
4791         found = NULL;
4792         foundsig = NULL;
4793         header_prev = NULL;
4794         for (header = node->data; header != NULL; header = header_next) {
4795                 header_next = header->next;
4796                 if (header->rdh_ttl <= now) {
4797                         /*
4798                          * This rdataset is stale.  If no one else is using the
4799                          * node, we can clean it up right now, otherwise we
4800                          * mark it as stale, and the node as dirty, so it will
4801                          * get cleaned up later.
4802                          */
4803                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4804                             (locktype == isc_rwlocktype_write ||
4805                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4806                                 /*
4807                                  * We update the node's status only when we
4808                                  * can get write access.
4809                                  */
4810                                 locktype = isc_rwlocktype_write;
4811
4812                                 if (dns_rbtnode_refcurrent(node) == 0) {
4813                                         isc_mem_t *mctx;
4814
4815                                         mctx = search.rbtdb->common.mctx;
4816                                         clean_stale_headers(search.rbtdb, mctx,
4817                                                             header);
4818                                         if (header_prev != NULL)
4819                                                 header_prev->next =
4820                                                         header->next;
4821                                         else
4822                                                 node->data = header->next;
4823                                         free_rdataset(search.rbtdb, mctx,
4824                                                       header);
4825                                 } else {
4826                                         header->attributes |=
4827                                                 RDATASET_ATTR_STALE;
4828                                         node->dirty = 1;
4829                                         header_prev = header;
4830                                 }
4831                         } else
4832                                 header_prev = header;
4833                 } else if (EXISTS(header)) {
4834                         /*
4835                          * If we found a type we were looking for, remember
4836                          * it.
4837                          */
4838                         if (header->type == dns_rdatatype_ns) {
4839                                 /*
4840                                  * Remember a NS rdataset even if we're
4841                                  * not specifically looking for it, because
4842                                  * we might need it later.
4843                                  */
4844                                 found = header;
4845                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4846                                 /*
4847                                  * If we need the NS rdataset, we'll also
4848                                  * need its signature.
4849                                  */
4850                                 foundsig = header;
4851                         }
4852                         header_prev = header;
4853                 } else
4854                         header_prev = header;
4855         }
4856
4857         if (found == NULL) {
4858                 /*
4859                  * No NS records here.
4860                  */
4861                 NODE_UNLOCK(lock, locktype);
4862                 goto find_ns;
4863         }
4864
4865         if (nodep != NULL) {
4866                 new_reference(search.rbtdb, node);
4867                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4868                 *nodep = node;
4869         }
4870
4871         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4872         if (foundsig != NULL)
4873                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4874                               sigrdataset);
4875
4876         if (need_headerupdate(found, search.now) ||
4877             (foundsig != NULL &&  need_headerupdate(foundsig, search.now))) {
4878                 if (locktype != isc_rwlocktype_write) {
4879                         NODE_UNLOCK(lock, locktype);
4880                         NODE_LOCK(lock, isc_rwlocktype_write);
4881                         locktype = isc_rwlocktype_write;
4882                         POST(locktype);
4883                 }
4884                 if (need_headerupdate(found, search.now))
4885                         update_header(search.rbtdb, found, search.now);
4886                 if (foundsig != NULL &&
4887                     need_headerupdate(foundsig, search.now)) {
4888                         update_header(search.rbtdb, foundsig, search.now);
4889                 }
4890         }
4891
4892         NODE_UNLOCK(lock, locktype);
4893
4894  tree_exit:
4895         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4896
4897         INSIST(!search.need_cleanup);
4898
4899         dns_rbtnodechain_reset(&search.chain);
4900
4901         if (result == DNS_R_DELEGATION)
4902                 result = ISC_R_SUCCESS;
4903
4904         return (result);
4905 }
4906
4907 static void
4908 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4909         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4910         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4911         unsigned int refs;
4912
4913         REQUIRE(VALID_RBTDB(rbtdb));
4914         REQUIRE(targetp != NULL && *targetp == NULL);
4915
4916         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4917         dns_rbtnode_refincrement(node, &refs);
4918         INSIST(refs != 0);
4919         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4920
4921         *targetp = source;
4922 }
4923
4924 static void
4925 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4926         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4927         dns_rbtnode_t *node;
4928         isc_boolean_t want_free = ISC_FALSE;
4929         isc_boolean_t inactive = ISC_FALSE;
4930         rbtdb_nodelock_t *nodelock;
4931
4932         REQUIRE(VALID_RBTDB(rbtdb));
4933         REQUIRE(targetp != NULL && *targetp != NULL);
4934
4935         node = (dns_rbtnode_t *)(*targetp);
4936         nodelock = &rbtdb->node_locks[node->locknum];
4937
4938         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4939
4940         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4941                                 isc_rwlocktype_none, ISC_FALSE)) {
4942                 if (isc_refcount_current(&nodelock->references) == 0 &&
4943                     nodelock->exiting) {
4944                         inactive = ISC_TRUE;
4945                 }
4946         }
4947
4948         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4949
4950         *targetp = NULL;
4951
4952         if (inactive) {
4953                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4954                 rbtdb->active--;
4955                 if (rbtdb->active == 0)
4956                         want_free = ISC_TRUE;
4957                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4958                 if (want_free) {
4959                         char buf[DNS_NAME_FORMATSIZE];
4960                         if (dns_name_dynamic(&rbtdb->common.origin))
4961                                 dns_name_format(&rbtdb->common.origin, buf,
4962                                                 sizeof(buf));
4963                         else
4964                                 strcpy(buf, "<UNKNOWN>");
4965                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4966                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4967                                       "calling free_rbtdb(%s)", buf);
4968                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
4969                 }
4970         }
4971 }
4972
4973 static isc_result_t
4974 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4975         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4976         dns_rbtnode_t *rbtnode = node;
4977         rdatasetheader_t *header;
4978         isc_boolean_t force_expire = ISC_FALSE;
4979         /*
4980          * These are the category and module used by the cache cleaner.
4981          */
4982         isc_boolean_t log = ISC_FALSE;
4983         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4984         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4985         int level = ISC_LOG_DEBUG(2);
4986         char printname[DNS_NAME_FORMATSIZE];
4987
4988         REQUIRE(VALID_RBTDB(rbtdb));
4989
4990         /*
4991          * Caller must hold a tree lock.
4992          */
4993
4994         if (now == 0)
4995                 isc_stdtime_get(&now);
4996
4997         if (isc_mem_isovermem(rbtdb->common.mctx)) {
4998                 isc_uint32_t val;
4999
5000                 isc_random_get(&val);
5001                 /*
5002                  * XXXDCL Could stand to have a better policy, like LRU.
5003                  */
5004                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
5005
5006                 /*
5007                  * Note that 'log' can be true IFF overmem is also true.
5008                  * overmem can currently only be true for cache
5009                  * databases -- hence all of the "overmem cache" log strings.
5010                  */
5011                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
5012                 if (log)
5013                         isc_log_write(dns_lctx, category, module, level,
5014                                       "overmem cache: %s %s",
5015                                       force_expire ? "FORCE" : "check",
5016                                       dns_rbt_formatnodename(rbtnode,
5017                                                            printname,
5018                                                            sizeof(printname)));
5019         }
5020
5021         /*
5022          * We may not need write access, but this code path is not performance
5023          * sensitive, so it should be okay to always lock as a writer.
5024          */
5025         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5026                   isc_rwlocktype_write);
5027
5028         for (header = rbtnode->data; header != NULL; header = header->next)
5029                 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
5030                         /*
5031                          * We don't check if refcurrent(rbtnode) == 0 and try
5032                          * to free like we do in cache_find(), because
5033                          * refcurrent(rbtnode) must be non-zero.  This is so
5034                          * because 'node' is an argument to the function.
5035                          */
5036                         header->attributes |= RDATASET_ATTR_STALE;
5037                         rbtnode->dirty = 1;
5038                         if (log)
5039                                 isc_log_write(dns_lctx, category, module,
5040                                               level, "overmem cache: stale %s",
5041                                               printname);
5042                 } else if (force_expire) {
5043                         if (! RETAIN(header)) {
5044                                 set_ttl(rbtdb, header, 0);
5045                                 header->attributes |= RDATASET_ATTR_STALE;
5046                                 rbtnode->dirty = 1;
5047                         } else if (log) {
5048                                 isc_log_write(dns_lctx, category, module,
5049                                               level, "overmem cache: "
5050                                               "reprieve by RETAIN() %s",
5051                                               printname);
5052                         }
5053                 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
5054                         isc_log_write(dns_lctx, category, module, level,
5055                                       "overmem cache: saved %s", printname);
5056
5057         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5058                     isc_rwlocktype_write);
5059
5060         return (ISC_R_SUCCESS);
5061 }
5062
5063 static void
5064 overmem(dns_db_t *db, isc_boolean_t overmem) {
5065         /* This is an empty callback.  See adb.c:water() */
5066
5067         UNUSED(db);
5068         UNUSED(overmem);
5069
5070         return;
5071 }
5072
5073 static void
5074 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5075         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5076         dns_rbtnode_t *rbtnode = node;
5077         isc_boolean_t first;
5078
5079         REQUIRE(VALID_RBTDB(rbtdb));
5080
5081         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5082                   isc_rwlocktype_read);
5083
5084         fprintf(out, "node %p, %u references, locknum = %u\n",
5085                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5086                 rbtnode->locknum);
5087         if (rbtnode->data != NULL) {
5088                 rdatasetheader_t *current, *top_next;
5089
5090                 for (current = rbtnode->data; current != NULL;
5091                      current = top_next) {
5092                         top_next = current->next;
5093                         first = ISC_TRUE;
5094                         fprintf(out, "\ttype %u", current->type);
5095                         do {
5096                                 if (!first)
5097                                         fprintf(out, "\t");
5098                                 first = ISC_FALSE;
5099                                 fprintf(out,
5100                                         "\tserial = %lu, ttl = %u, "
5101                                         "trust = %u, attributes = %u, "
5102                                         "resign = %u\n",
5103                                         (unsigned long)current->serial,
5104                                         current->rdh_ttl,
5105                                         current->trust,
5106                                         current->attributes,
5107                                         current->resign);
5108                                 current = current->down;
5109                         } while (current != NULL);
5110                 }
5111         } else
5112                 fprintf(out, "(empty)\n");
5113
5114         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5115                     isc_rwlocktype_read);
5116 }
5117
5118 static isc_result_t
5119 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5120 {
5121         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5122         rbtdb_dbiterator_t *rbtdbiter;
5123
5124         REQUIRE(VALID_RBTDB(rbtdb));
5125
5126         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5127         if (rbtdbiter == NULL)
5128                 return (ISC_R_NOMEMORY);
5129
5130         rbtdbiter->common.methods = &dbiterator_methods;
5131         rbtdbiter->common.db = NULL;
5132         dns_db_attach(db, &rbtdbiter->common.db);
5133         rbtdbiter->common.relative_names =
5134                         ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5135         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5136         rbtdbiter->common.cleaning = ISC_FALSE;
5137         rbtdbiter->paused = ISC_TRUE;
5138         rbtdbiter->tree_locked = isc_rwlocktype_none;
5139         rbtdbiter->result = ISC_R_SUCCESS;
5140         dns_fixedname_init(&rbtdbiter->name);
5141         dns_fixedname_init(&rbtdbiter->origin);
5142         rbtdbiter->node = NULL;
5143         rbtdbiter->delete = 0;
5144         rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5145         rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5146         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5147         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5148         dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5149         if (rbtdbiter->nsec3only)
5150                 rbtdbiter->current = &rbtdbiter->nsec3chain;
5151         else
5152                 rbtdbiter->current = &rbtdbiter->chain;
5153
5154         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5155
5156         return (ISC_R_SUCCESS);
5157 }
5158
5159 static isc_result_t
5160 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5161                   dns_rdatatype_t type, dns_rdatatype_t covers,
5162                   isc_stdtime_t now, dns_rdataset_t *rdataset,
5163                   dns_rdataset_t *sigrdataset)
5164 {
5165         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5166         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5167         rdatasetheader_t *header, *header_next, *found, *foundsig;
5168         rbtdb_serial_t serial;
5169         rbtdb_version_t *rbtversion = version;
5170         isc_boolean_t close_version = ISC_FALSE;
5171         rbtdb_rdatatype_t matchtype, sigmatchtype;
5172
5173         REQUIRE(VALID_RBTDB(rbtdb));
5174         REQUIRE(type != dns_rdatatype_any);
5175
5176         if (rbtversion == NULL) {
5177                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5178                 close_version = ISC_TRUE;
5179         }
5180         serial = rbtversion->serial;
5181         now = 0;
5182
5183         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5184                   isc_rwlocktype_read);
5185
5186         found = NULL;
5187         foundsig = NULL;
5188         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5189         if (covers == 0)
5190                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5191         else
5192                 sigmatchtype = 0;
5193
5194         for (header = rbtnode->data; header != NULL; header = header_next) {
5195                 header_next = header->next;
5196                 do {
5197                         if (header->serial <= serial &&
5198                             !IGNORE(header)) {
5199                                 /*
5200                                  * Is this a "this rdataset doesn't
5201                                  * exist" record?
5202                                  */
5203                                 if (NONEXISTENT(header))
5204                                         header = NULL;
5205                                 break;
5206                         } else
5207                                 header = header->down;
5208                 } while (header != NULL);
5209                 if (header != NULL) {
5210                         /*
5211                          * We have an active, extant rdataset.  If it's a
5212                          * type we're looking for, remember it.
5213                          */
5214                         if (header->type == matchtype) {
5215                                 found = header;
5216                                 if (foundsig != NULL)
5217                                         break;
5218                         } else if (header->type == sigmatchtype) {
5219                                 foundsig = header;
5220                                 if (found != NULL)
5221                                         break;
5222                         }
5223                 }
5224         }
5225         if (found != NULL) {
5226                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5227                 if (foundsig != NULL)
5228                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5229                                       sigrdataset);
5230         }
5231
5232         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5233                     isc_rwlocktype_read);
5234
5235         if (close_version)
5236                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5237                              ISC_FALSE);
5238
5239         if (found == NULL)
5240                 return (ISC_R_NOTFOUND);
5241
5242         return (ISC_R_SUCCESS);
5243 }
5244
5245 static isc_result_t
5246 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5247                    dns_rdatatype_t type, dns_rdatatype_t covers,
5248                    isc_stdtime_t now, dns_rdataset_t *rdataset,
5249                    dns_rdataset_t *sigrdataset)
5250 {
5251         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5252         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5253         rdatasetheader_t *header, *header_next, *found, *foundsig;
5254         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5255         isc_result_t result;
5256         nodelock_t *lock;
5257         isc_rwlocktype_t locktype;
5258
5259         REQUIRE(VALID_RBTDB(rbtdb));
5260         REQUIRE(type != dns_rdatatype_any);
5261
5262         UNUSED(version);
5263
5264         result = ISC_R_SUCCESS;
5265
5266         if (now == 0)
5267                 isc_stdtime_get(&now);
5268
5269         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5270         locktype = isc_rwlocktype_read;
5271         NODE_LOCK(lock, locktype);
5272
5273         found = NULL;
5274         foundsig = NULL;
5275         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5276         negtype = RBTDB_RDATATYPE_VALUE(0, type);
5277         if (covers == 0)
5278                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5279         else
5280                 sigmatchtype = 0;
5281
5282         for (header = rbtnode->data; header != NULL; header = header_next) {
5283                 header_next = header->next;
5284                 if (header->rdh_ttl <= now) {
5285                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5286                             (locktype == isc_rwlocktype_write ||
5287                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5288                                 /*
5289                                  * We update the node's status only when we
5290                                  * can get write access.
5291                                  */
5292                                 locktype = isc_rwlocktype_write;
5293
5294                                 /*
5295                                  * We don't check if refcurrent(rbtnode) == 0
5296                                  * and try to free like we do in cache_find(),
5297                                  * because refcurrent(rbtnode) must be
5298                                  * non-zero.  This is so because 'node' is an
5299                                  * argument to the function.
5300                                  */
5301                                 header->attributes |= RDATASET_ATTR_STALE;
5302                                 rbtnode->dirty = 1;
5303                         }
5304                 } else if (EXISTS(header)) {
5305                         if (header->type == matchtype)
5306                                 found = header;
5307                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5308                                  header->type == negtype)
5309                                 found = header;
5310                         else if (header->type == sigmatchtype)
5311                                 foundsig = header;
5312                 }
5313         }
5314         if (found != NULL) {
5315                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5316                 if (foundsig != NULL)
5317                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5318                                       sigrdataset);
5319         }
5320
5321         NODE_UNLOCK(lock, locktype);
5322
5323         if (found == NULL)
5324                 return (ISC_R_NOTFOUND);
5325
5326         if (NEGATIVE(found)) {
5327                 /*
5328                  * We found a negative cache entry.
5329                  */
5330                 if (NXDOMAIN(found))
5331                         result = DNS_R_NCACHENXDOMAIN;
5332                 else
5333                         result = DNS_R_NCACHENXRRSET;
5334         }
5335
5336         return (result);
5337 }
5338
5339 static isc_result_t
5340 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5341              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5342 {
5343         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5344         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5345         rbtdb_version_t *rbtversion = version;
5346         rbtdb_rdatasetiter_t *iterator;
5347         unsigned int refs;
5348
5349         REQUIRE(VALID_RBTDB(rbtdb));
5350
5351         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5352         if (iterator == NULL)
5353                 return (ISC_R_NOMEMORY);
5354
5355         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5356                 now = 0;
5357                 if (rbtversion == NULL)
5358                         currentversion(db,
5359                                  (dns_dbversion_t **) (void *)(&rbtversion));
5360                 else {
5361                         unsigned int refs;
5362
5363                         isc_refcount_increment(&rbtversion->references,
5364                                                &refs);
5365                         INSIST(refs > 1);
5366                 }
5367         } else {
5368                 if (now == 0)
5369                         isc_stdtime_get(&now);
5370                 rbtversion = NULL;
5371         }
5372
5373         iterator->common.magic = DNS_RDATASETITER_MAGIC;
5374         iterator->common.methods = &rdatasetiter_methods;
5375         iterator->common.db = db;
5376         iterator->common.node = node;
5377         iterator->common.version = (dns_dbversion_t *)rbtversion;
5378         iterator->common.now = now;
5379
5380         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5381
5382         dns_rbtnode_refincrement(rbtnode, &refs);
5383         INSIST(refs != 0);
5384
5385         iterator->current = NULL;
5386
5387         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5388
5389         *iteratorp = (dns_rdatasetiter_t *)iterator;
5390
5391         return (ISC_R_SUCCESS);
5392 }
5393
5394 static isc_boolean_t
5395 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5396         rdatasetheader_t *header, *header_next;
5397         isc_boolean_t cname, other_data;
5398         dns_rdatatype_t rdtype;
5399
5400         /*
5401          * The caller must hold the node lock.
5402          */
5403
5404         /*
5405          * Look for CNAME and "other data" rdatasets active in our version.
5406          */
5407         cname = ISC_FALSE;
5408         other_data = ISC_FALSE;
5409         for (header = node->data; header != NULL; header = header_next) {
5410                 header_next = header->next;
5411                 if (header->type == dns_rdatatype_cname) {
5412                         /*
5413                          * Look for an active extant CNAME.
5414                          */
5415                         do {
5416                                 if (header->serial <= serial &&
5417                                     !IGNORE(header)) {
5418                                         /*
5419                                          * Is this a "this rdataset doesn't
5420                                          * exist" record?
5421                                          */
5422                                         if (NONEXISTENT(header))
5423                                                 header = NULL;
5424                                         break;
5425                                 } else
5426                                         header = header->down;
5427                         } while (header != NULL);
5428                         if (header != NULL)
5429                                 cname = ISC_TRUE;
5430                 } else {
5431                         /*
5432                          * Look for active extant "other data".
5433                          *
5434                          * "Other data" is any rdataset whose type is not
5435                          * KEY, NSEC, SIG or RRSIG.
5436                          */
5437                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
5438                         if (rdtype != dns_rdatatype_key &&
5439                             rdtype != dns_rdatatype_sig &&
5440                             rdtype != dns_rdatatype_nsec &&
5441                             rdtype != dns_rdatatype_rrsig) {
5442                                 /*
5443                                  * Is it active and extant?
5444                                  */
5445                                 do {
5446                                         if (header->serial <= serial &&
5447                                             !IGNORE(header)) {
5448                                                 /*
5449                                                  * Is this a "this rdataset
5450                                                  * doesn't exist" record?
5451                                                  */
5452                                                 if (NONEXISTENT(header))
5453                                                         header = NULL;
5454                                                 break;
5455                                         } else
5456                                                 header = header->down;
5457                                 } while (header != NULL);
5458                                 if (header != NULL)
5459                                         other_data = ISC_TRUE;
5460                         }
5461                 }
5462         }
5463
5464         if (cname && other_data)
5465                 return (ISC_TRUE);
5466
5467         return (ISC_FALSE);
5468 }
5469
5470 static isc_result_t
5471 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5472         isc_result_t result;
5473
5474         INSIST(!IS_CACHE(rbtdb));
5475         INSIST(newheader->heap_index == 0);
5476         INSIST(!ISC_LINK_LINKED(newheader, link));
5477
5478         result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5479         return (result);
5480 }
5481
5482 static isc_result_t
5483 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5484     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5485     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5486 {
5487         rbtdb_changed_t *changed = NULL;
5488         rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
5489         unsigned char *merged;
5490         isc_result_t result;
5491         isc_boolean_t header_nx;
5492         isc_boolean_t newheader_nx;
5493         isc_boolean_t merge;
5494         dns_rdatatype_t rdtype, covers;
5495         rbtdb_rdatatype_t negtype, sigtype;
5496         dns_trust_t trust;
5497         int idx;
5498
5499         /*
5500          * Add an rdatasetheader_t to a node.
5501          */
5502
5503         /*
5504          * Caller must be holding the node lock.
5505          */
5506
5507         if ((options & DNS_DBADD_MERGE) != 0) {
5508                 REQUIRE(rbtversion != NULL);
5509                 merge = ISC_TRUE;
5510         } else
5511                 merge = ISC_FALSE;
5512
5513         if ((options & DNS_DBADD_FORCE) != 0)
5514                 trust = dns_trust_ultimate;
5515         else
5516                 trust = newheader->trust;
5517
5518         if (rbtversion != NULL && !loading) {
5519                 /*
5520                  * We always add a changed record, even if no changes end up
5521                  * being made to this node, because it's harmless and
5522                  * simplifies the code.
5523                  */
5524                 changed = add_changed(rbtdb, rbtversion, rbtnode);
5525                 if (changed == NULL) {
5526                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5527                         return (ISC_R_NOMEMORY);
5528                 }
5529         }
5530
5531         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5532         topheader_prev = NULL;
5533         sigheader = NULL;
5534         negtype = 0;
5535         if (rbtversion == NULL && !newheader_nx) {
5536                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5537                 if (NEGATIVE(newheader)) {
5538                         /*
5539                          * We're adding a negative cache entry.
5540                          */
5541                         covers = RBTDB_RDATATYPE_EXT(newheader->type);
5542                         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
5543                                                         covers);
5544                         for (topheader = rbtnode->data;
5545                              topheader != NULL;
5546                              topheader = topheader->next) {
5547                                 /*
5548                                  * If we're adding an negative cache entry
5549                                  * which covers all types (NXDOMAIN,
5550                                  * NODATA(QTYPE=ANY)).
5551                                  *
5552                                  * We make all other data stale so that the
5553                                  * only rdataset that can be found at this
5554                                  * node is the negative cache entry.
5555                                  *
5556                                  * Otherwise look for any RRSIGs of the
5557                                  * given type so they can be marked stale
5558                                  * later.
5559                                  */
5560                                 if (covers == dns_rdatatype_any) {
5561                                         set_ttl(rbtdb, topheader, 0);
5562                                         topheader->attributes |=
5563                                                 RDATASET_ATTR_STALE;
5564                                         rbtnode->dirty = 1;
5565                                 } else if (topheader->type == sigtype)
5566                                         sigheader = topheader;
5567                         }
5568                         if (covers == dns_rdatatype_any)
5569                                 goto find_header;
5570                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5571                 } else {
5572                         /*
5573                          * We're adding something that isn't a
5574                          * negative cache entry.  Look for an extant
5575                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5576                          * cache entry.
5577                          */
5578                         for (topheader = rbtnode->data;
5579                              topheader != NULL;
5580                              topheader = topheader->next) {
5581                                 if (topheader->type ==
5582                                     RBTDB_RDATATYPE_NCACHEANY)
5583                                         break;
5584                         }
5585                         if (topheader != NULL && EXISTS(topheader) &&
5586                             topheader->rdh_ttl > now) {
5587                                 /*
5588                                  * Found one.
5589                                  */
5590                                 if (trust < topheader->trust) {
5591                                         /*
5592                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
5593                                          * is more trusted.
5594                                          */
5595                                         free_rdataset(rbtdb,
5596                                                       rbtdb->common.mctx,
5597                                                       newheader);
5598                                         if (addedrdataset != NULL)
5599                                                 bind_rdataset(rbtdb, rbtnode,
5600                                                               topheader, now,
5601                                                               addedrdataset);
5602                                         return (DNS_R_UNCHANGED);
5603                                 }
5604                                 /*
5605                                  * The new rdataset is better.  Expire the
5606                                  * NXDOMAIN/NODATA(QTYPE=ANY).
5607                                  */
5608                                 set_ttl(rbtdb, topheader, 0);
5609                                 topheader->attributes |= RDATASET_ATTR_STALE;
5610                                 rbtnode->dirty = 1;
5611                                 topheader = NULL;
5612                                 goto find_header;
5613                         }
5614                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5615                 }
5616         }
5617
5618         for (topheader = rbtnode->data;
5619              topheader != NULL;
5620              topheader = topheader->next) {
5621                 if (topheader->type == newheader->type ||
5622                     topheader->type == negtype)
5623                         break;
5624                 topheader_prev = topheader;
5625         }
5626
5627  find_header:
5628         /*
5629          * If header isn't NULL, we've found the right type.  There may be
5630          * IGNORE rdatasets between the top of the chain and the first real
5631          * data.  We skip over them.
5632          */
5633         header = topheader;
5634         while (header != NULL && IGNORE(header))
5635                 header = header->down;
5636         if (header != NULL) {
5637                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5638
5639                 /*
5640                  * Deleting an already non-existent rdataset has no effect.
5641                  */
5642                 if (header_nx && newheader_nx) {
5643                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5644                         return (DNS_R_UNCHANGED);
5645                 }
5646
5647                 /*
5648                  * Trying to add an rdataset with lower trust to a cache DB
5649                  * has no effect, provided that the cache data isn't stale.
5650                  */
5651                 if (rbtversion == NULL && trust < header->trust &&
5652                     (header->rdh_ttl > now || header_nx)) {
5653                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5654                         if (addedrdataset != NULL)
5655                                 bind_rdataset(rbtdb, rbtnode, header, now,
5656                                               addedrdataset);
5657                         return (DNS_R_UNCHANGED);
5658                 }
5659
5660                 /*
5661                  * Don't merge if a nonexistent rdataset is involved.
5662                  */
5663                 if (merge && (header_nx || newheader_nx))
5664                         merge = ISC_FALSE;
5665
5666                 /*
5667                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5668                  * that is the union of 'newheader' and 'header'.
5669                  */
5670                 if (merge) {
5671                         unsigned int flags = 0;
5672                         INSIST(rbtversion->serial >= header->serial);
5673                         merged = NULL;
5674                         result = ISC_R_SUCCESS;
5675
5676                         if ((options & DNS_DBADD_EXACT) != 0)
5677                                 flags |= DNS_RDATASLAB_EXACT;
5678                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5679                              newheader->rdh_ttl != header->rdh_ttl)
5680                                         result = DNS_R_NOTEXACT;
5681                         else if (newheader->rdh_ttl != header->rdh_ttl)
5682                                 flags |= DNS_RDATASLAB_FORCE;
5683                         if (result == ISC_R_SUCCESS)
5684                                 result = dns_rdataslab_merge(
5685                                              (unsigned char *)header,
5686                                              (unsigned char *)newheader,
5687                                              (unsigned int)(sizeof(*newheader)),
5688                                              rbtdb->common.mctx,
5689                                              rbtdb->common.rdclass,
5690                                              (dns_rdatatype_t)header->type,
5691                                              flags, &merged);
5692                         if (result == ISC_R_SUCCESS) {
5693                                 /*
5694                                  * If 'header' has the same serial number as
5695                                  * we do, we could clean it up now if we knew
5696                                  * that our caller had no references to it.
5697                                  * We don't know this, however, so we leave it
5698                                  * alone.  It will get cleaned up when
5699                                  * clean_zone_node() runs.
5700                                  */
5701                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5702                                               newheader);
5703                                 newheader = (rdatasetheader_t *)merged;
5704                                 if (loading && RESIGN(newheader) &&
5705                                     RESIGN(header) &&
5706                                     header->resign < newheader->resign)
5707                                         newheader->resign = header->resign;
5708                         } else {
5709                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5710                                               newheader);
5711                                 return (result);
5712                         }
5713                 }
5714                 /*
5715                  * Don't replace existing NS, A and AAAA RRsets
5716                  * in the cache if they are already exist.  This
5717                  * prevents named being locked to old servers.
5718                  * Don't lower trust of existing record if the
5719                  * update is forced.
5720                  */
5721                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5722                     header->type == dns_rdatatype_ns &&
5723                     !header_nx && !newheader_nx &&
5724                     header->trust >= newheader->trust &&
5725                     dns_rdataslab_equalx((unsigned char *)header,
5726                                          (unsigned char *)newheader,
5727                                          (unsigned int)(sizeof(*newheader)),
5728                                          rbtdb->common.rdclass,
5729                                          (dns_rdatatype_t)header->type)) {
5730                         /*
5731                          * Honour the new ttl if it is less than the
5732                          * older one.
5733                          */
5734                         if (header->rdh_ttl > newheader->rdh_ttl)
5735                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5736                         if (header->noqname == NULL &&
5737                             newheader->noqname != NULL) {
5738                                 header->noqname = newheader->noqname;
5739                                 newheader->noqname = NULL;
5740                         }
5741                         if (header->closest == NULL &&
5742                             newheader->closest != NULL) {
5743                                 header->closest = newheader->closest;
5744                                 newheader->closest = NULL;
5745                         }
5746                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5747                         if (addedrdataset != NULL)
5748                                 bind_rdataset(rbtdb, rbtnode, header, now,
5749                                               addedrdataset);
5750                         return (ISC_R_SUCCESS);
5751                 }
5752                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5753                     (header->type == dns_rdatatype_a ||
5754                      header->type == dns_rdatatype_aaaa) &&
5755                     !header_nx && !newheader_nx &&
5756                     header->trust >= newheader->trust &&
5757                     dns_rdataslab_equal((unsigned char *)header,
5758                                         (unsigned char *)newheader,
5759                                         (unsigned int)(sizeof(*newheader)))) {
5760                         /*
5761                          * Honour the new ttl if it is less than the
5762                          * older one.
5763                          */
5764                         if (header->rdh_ttl > newheader->rdh_ttl)
5765                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5766                         if (header->noqname == NULL &&
5767                             newheader->noqname != NULL) {
5768                                 header->noqname = newheader->noqname;
5769                                 newheader->noqname = NULL;
5770                         }
5771                         if (header->closest == NULL &&
5772                             newheader->closest != NULL) {
5773                                 header->closest = newheader->closest;
5774                                 newheader->closest = NULL;
5775                         }
5776                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5777                         if (addedrdataset != NULL)
5778                                 bind_rdataset(rbtdb, rbtnode, header, now,
5779                                               addedrdataset);
5780                         return (ISC_R_SUCCESS);
5781                 }
5782                 INSIST(rbtversion == NULL ||
5783                        rbtversion->serial >= topheader->serial);
5784                 if (topheader_prev != NULL)
5785                         topheader_prev->next = newheader;
5786                 else
5787                         rbtnode->data = newheader;
5788                 newheader->next = topheader->next;
5789                 if (loading) {
5790                         /*
5791                          * There are no other references to 'header' when
5792                          * loading, so we MAY clean up 'header' now.
5793                          * Since we don't generate changed records when
5794                          * loading, we MUST clean up 'header' now.
5795                          */
5796                         newheader->down = NULL;
5797                         free_rdataset(rbtdb, rbtdb->common.mctx, header);
5798                 } else {
5799                         newheader->down = topheader;
5800                         topheader->next = newheader;
5801                         rbtnode->dirty = 1;
5802                         if (changed != NULL)
5803                                 changed->dirty = ISC_TRUE;
5804                         if (rbtversion == NULL) {
5805                                 set_ttl(rbtdb, header, 0);
5806                                 header->attributes |= RDATASET_ATTR_STALE;
5807                                 if (sigheader != NULL) {
5808                                         set_ttl(rbtdb, sigheader, 0);
5809                                         sigheader->attributes |=
5810                                                  RDATASET_ATTR_STALE;
5811                                 }
5812                         }
5813                         idx = newheader->node->locknum;
5814                         if (IS_CACHE(rbtdb)) {
5815                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5816                                                  newheader, link);
5817                                 /*
5818                                  * XXXMLG We don't check the return value
5819                                  * here.  If it fails, we will not do TTL
5820                                  * based expiry on this node.  However, we
5821                                  * will do it on the LRU side, so memory
5822                                  * will not leak... for long.
5823                                  */
5824                                 isc_heap_insert(rbtdb->heaps[idx], newheader);
5825                         } else if (RESIGN(newheader))
5826                                 resign_insert(rbtdb, idx, newheader);
5827                 }
5828         } else {
5829                 /*
5830                  * No non-IGNORED rdatasets of the given type exist at
5831                  * this node.
5832                  */
5833
5834                 /*
5835                  * If we're trying to delete the type, don't bother.
5836                  */
5837                 if (newheader_nx) {
5838                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5839                         return (DNS_R_UNCHANGED);
5840                 }
5841
5842                 if (topheader != NULL) {
5843                         /*
5844                          * We have an list of rdatasets of the given type,
5845                          * but they're all marked IGNORE.  We simply insert
5846                          * the new rdataset at the head of the list.
5847                          *
5848                          * Ignored rdatasets cannot occur during loading, so
5849                          * we INSIST on it.
5850                          */
5851                         INSIST(!loading);
5852                         INSIST(rbtversion == NULL ||
5853                                rbtversion->serial >= topheader->serial);
5854                         if (topheader_prev != NULL)
5855                                 topheader_prev->next = newheader;
5856                         else
5857                                 rbtnode->data = newheader;
5858                         newheader->next = topheader->next;
5859                         newheader->down = topheader;
5860                         topheader->next = newheader;
5861                         rbtnode->dirty = 1;
5862                         if (changed != NULL)
5863                                 changed->dirty = ISC_TRUE;
5864                 } else {
5865                         /*
5866                          * No rdatasets of the given type exist at the node.
5867                          */
5868                         newheader->next = rbtnode->data;
5869                         newheader->down = NULL;
5870                         rbtnode->data = newheader;
5871                 }
5872                 idx = newheader->node->locknum;
5873                 if (IS_CACHE(rbtdb)) {
5874                         ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5875                                          newheader, link);
5876                         isc_heap_insert(rbtdb->heaps[idx], newheader);
5877                 } else if (RESIGN(newheader)) {
5878                         resign_insert(rbtdb, idx, newheader);
5879                 }
5880         }
5881
5882         /*
5883          * Check if the node now contains CNAME and other data.
5884          */
5885         if (rbtversion != NULL &&
5886             cname_and_other_data(rbtnode, rbtversion->serial))
5887                 return (DNS_R_CNAMEANDOTHER);
5888
5889         if (addedrdataset != NULL)
5890                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5891
5892         return (ISC_R_SUCCESS);
5893 }
5894
5895 static inline isc_boolean_t
5896 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5897                 rbtdb_rdatatype_t type)
5898 {
5899         if (IS_CACHE(rbtdb)) {
5900                 if (type == dns_rdatatype_dname)
5901                         return (ISC_TRUE);
5902                 else
5903                         return (ISC_FALSE);
5904         } else if (type == dns_rdatatype_dname ||
5905                    (type == dns_rdatatype_ns &&
5906                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5907                 return (ISC_TRUE);
5908         return (ISC_FALSE);
5909 }
5910
5911 static inline isc_result_t
5912 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5913            dns_rdataset_t *rdataset)
5914 {
5915         struct noqname *noqname;
5916         isc_mem_t *mctx = rbtdb->common.mctx;
5917         dns_name_t name;
5918         dns_rdataset_t neg, negsig;
5919         isc_result_t result;
5920         isc_region_t r;
5921
5922         dns_name_init(&name, NULL);
5923         dns_rdataset_init(&neg);
5924         dns_rdataset_init(&negsig);
5925
5926         result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
5927         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5928
5929         noqname = isc_mem_get(mctx, sizeof(*noqname));
5930         if (noqname == NULL) {
5931                 result = ISC_R_NOMEMORY;
5932                 goto cleanup;
5933         }
5934         dns_name_init(&noqname->name, NULL);
5935         noqname->neg = NULL;
5936         noqname->negsig = NULL;
5937         noqname->type = neg.type;
5938         result = dns_name_dup(&name, mctx, &noqname->name);
5939         if (result != ISC_R_SUCCESS)
5940                 goto cleanup;
5941         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5942         if (result != ISC_R_SUCCESS)
5943                 goto cleanup;
5944         noqname->neg = r.base;
5945         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5946         if (result != ISC_R_SUCCESS)
5947                 goto cleanup;
5948         noqname->negsig = r.base;
5949         dns_rdataset_disassociate(&neg);
5950         dns_rdataset_disassociate(&negsig);
5951         newheader->noqname = noqname;
5952         return (ISC_R_SUCCESS);
5953
5954 cleanup:
5955         dns_rdataset_disassociate(&neg);
5956         dns_rdataset_disassociate(&negsig);
5957         free_noqname(mctx, &noqname);
5958         return(result);
5959 }
5960
5961 static inline isc_result_t
5962 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5963            dns_rdataset_t *rdataset)
5964 {
5965         struct noqname *closest;
5966         isc_mem_t *mctx = rbtdb->common.mctx;
5967         dns_name_t name;
5968         dns_rdataset_t neg, negsig;
5969         isc_result_t result;
5970         isc_region_t r;
5971
5972         dns_name_init(&name, NULL);
5973         dns_rdataset_init(&neg);
5974         dns_rdataset_init(&negsig);
5975
5976         result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
5977         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5978
5979         closest = isc_mem_get(mctx, sizeof(*closest));
5980         if (closest == NULL) {
5981                 result = ISC_R_NOMEMORY;
5982                 goto cleanup;
5983         }
5984         dns_name_init(&closest->name, NULL);
5985         closest->neg = NULL;
5986         closest->negsig = NULL;
5987         closest->type = neg.type;
5988         result = dns_name_dup(&name, mctx, &closest->name);
5989         if (result != ISC_R_SUCCESS)
5990                 goto cleanup;
5991         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5992         if (result != ISC_R_SUCCESS)
5993                 goto cleanup;
5994         closest->neg = r.base;
5995         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5996         if (result != ISC_R_SUCCESS)
5997                 goto cleanup;
5998         closest->negsig = r.base;
5999         dns_rdataset_disassociate(&neg);
6000         dns_rdataset_disassociate(&negsig);
6001         newheader->closest = closest;
6002         return (ISC_R_SUCCESS);
6003
6004  cleanup:
6005         dns_rdataset_disassociate(&neg);
6006         dns_rdataset_disassociate(&negsig);
6007         free_noqname(mctx, &closest);
6008         return(result);
6009 }
6010
6011 static dns_dbmethods_t zone_methods;
6012
6013 static isc_result_t
6014 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6015             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
6016             dns_rdataset_t *addedrdataset)
6017 {
6018         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6019         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6020         rbtdb_version_t *rbtversion = version;
6021         isc_region_t region;
6022         rdatasetheader_t *newheader;
6023         rdatasetheader_t *header;
6024         isc_result_t result;
6025         isc_boolean_t delegating;
6026         isc_boolean_t tree_locked = ISC_FALSE;
6027         isc_boolean_t cache_is_overmem = ISC_FALSE;
6028
6029         REQUIRE(VALID_RBTDB(rbtdb));
6030
6031         if (rbtdb->common.methods == &zone_methods)
6032                 REQUIRE(((rbtnode->nsec3 &&
6033                           (rdataset->type == dns_rdatatype_nsec3 ||
6034                            rdataset->covers == dns_rdatatype_nsec3)) ||
6035                          (!rbtnode->nsec3 &&
6036                            rdataset->type != dns_rdatatype_nsec3 &&
6037                            rdataset->covers != dns_rdatatype_nsec3)));
6038
6039         if (rbtversion == NULL) {
6040                 if (now == 0)
6041                         isc_stdtime_get(&now);
6042         } else
6043                 now = 0;
6044
6045         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6046                                             &region,
6047                                             sizeof(rdatasetheader_t));
6048         if (result != ISC_R_SUCCESS)
6049                 return (result);
6050
6051         newheader = (rdatasetheader_t *)region.base;
6052         init_rdataset(rbtdb, newheader);
6053         set_ttl(rbtdb, newheader, rdataset->ttl + now);
6054         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6055                                                 rdataset->covers);
6056         newheader->attributes = 0;
6057         newheader->noqname = NULL;
6058         newheader->closest = NULL;
6059         newheader->count = init_count++;
6060         newheader->trust = rdataset->trust;
6061         newheader->additional_auth = NULL;
6062         newheader->additional_glue = NULL;
6063         newheader->last_used = now;
6064         newheader->node = rbtnode;
6065         if (rbtversion != NULL) {
6066                 newheader->serial = rbtversion->serial;
6067                 now = 0;
6068
6069                 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6070                         newheader->attributes |= RDATASET_ATTR_RESIGN;
6071                         newheader->resign = rdataset->resign;
6072                 } else
6073                         newheader->resign = 0;
6074         } else {
6075                 newheader->serial = 1;
6076                 newheader->resign = 0;
6077                 if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
6078                         newheader->attributes |= RDATASET_ATTR_NEGATIVE;
6079                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6080                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6081                 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6082                         newheader->attributes |= RDATASET_ATTR_OPTOUT;
6083                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6084                         result = addnoqname(rbtdb, newheader, rdataset);
6085                         if (result != ISC_R_SUCCESS) {
6086                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6087                                               newheader);
6088                                 return (result);
6089                         }
6090                 }
6091                 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6092                         result = addclosest(rbtdb, newheader, rdataset);
6093                         if (result != ISC_R_SUCCESS) {
6094                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6095                                               newheader);
6096                                 return (result);
6097                         }
6098                 }
6099         }
6100
6101         /*
6102          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6103          * just DNAME for the cache), then we need to set the callback bit
6104          * on the node.
6105          */
6106         if (delegating_type(rbtdb, rbtnode, rdataset->type))
6107                 delegating = ISC_TRUE;
6108         else
6109                 delegating = ISC_FALSE;
6110
6111         /*
6112          * If we're adding a delegation type or the DB is a cache in an overmem
6113          * state, hold an exclusive lock on the tree.  In the latter case
6114          * the lock does not necessarily have to be acquired but it will help
6115          * purge stale entries more effectively.
6116          */
6117         if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
6118                 cache_is_overmem = ISC_TRUE;
6119         if (delegating || cache_is_overmem) {
6120                 tree_locked = ISC_TRUE;
6121                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6122         }
6123
6124         if (cache_is_overmem)
6125                 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6126
6127         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6128                   isc_rwlocktype_write);
6129
6130         if (rbtdb->rrsetstats != NULL) {
6131                 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6132                 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6133         }
6134
6135         if (IS_CACHE(rbtdb)) {
6136                 if (tree_locked)
6137                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6138
6139                 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6140                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6141                         expire_header(rbtdb, header, tree_locked);
6142
6143                 /*
6144                  * If we've been holding a write lock on the tree just for
6145                  * cleaning, we can release it now.  However, we still need the
6146                  * node lock.
6147                  */
6148                 if (tree_locked && !delegating) {
6149                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6150                         tree_locked = ISC_FALSE;
6151                 }
6152         }
6153
6154         result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
6155                      addedrdataset, now);
6156         if (result == ISC_R_SUCCESS && delegating)
6157                 rbtnode->find_callback = 1;
6158
6159         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6160                     isc_rwlocktype_write);
6161
6162         if (tree_locked)
6163                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6164
6165         /*
6166          * Update the zone's secure status.  If version is non-NULL
6167          * this is deferred until closeversion() is called.
6168          */
6169         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6170                 iszonesecure(db, version, rbtdb->origin_node);
6171
6172         return (result);
6173 }
6174
6175 static isc_result_t
6176 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6177                  dns_rdataset_t *rdataset, unsigned int options,
6178                  dns_rdataset_t *newrdataset)
6179 {
6180         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6181         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6182         rbtdb_version_t *rbtversion = version;
6183         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6184         unsigned char *subresult;
6185         isc_region_t region;
6186         isc_result_t result;
6187         rbtdb_changed_t *changed;
6188
6189         REQUIRE(VALID_RBTDB(rbtdb));
6190
6191         if (rbtdb->common.methods == &zone_methods)
6192                 REQUIRE(((rbtnode->nsec3 &&
6193                           (rdataset->type == dns_rdatatype_nsec3 ||
6194                            rdataset->covers == dns_rdatatype_nsec3)) ||
6195                          (!rbtnode->nsec3 &&
6196                            rdataset->type != dns_rdatatype_nsec3 &&
6197                            rdataset->covers != dns_rdatatype_nsec3)));
6198
6199         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6200                                             &region,
6201                                             sizeof(rdatasetheader_t));
6202         if (result != ISC_R_SUCCESS)
6203                 return (result);
6204         newheader = (rdatasetheader_t *)region.base;
6205         init_rdataset(rbtdb, newheader);
6206         set_ttl(rbtdb, newheader, rdataset->ttl);
6207         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6208                                                 rdataset->covers);
6209         newheader->attributes = 0;
6210         newheader->serial = rbtversion->serial;
6211         newheader->trust = 0;
6212         newheader->noqname = NULL;
6213         newheader->closest = NULL;
6214         newheader->count = init_count++;
6215         newheader->additional_auth = NULL;
6216         newheader->additional_glue = NULL;
6217         newheader->last_used = 0;
6218         newheader->node = rbtnode;
6219         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6220                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6221                 newheader->resign = rdataset->resign;
6222         } else
6223                 newheader->resign = 0;
6224
6225         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6226                   isc_rwlocktype_write);
6227
6228         changed = add_changed(rbtdb, rbtversion, rbtnode);
6229         if (changed == NULL) {
6230                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6231                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6232                             isc_rwlocktype_write);
6233                 return (ISC_R_NOMEMORY);
6234         }
6235
6236         topheader_prev = NULL;
6237         for (topheader = rbtnode->data;
6238              topheader != NULL;
6239              topheader = topheader->next) {
6240                 if (topheader->type == newheader->type)
6241                         break;
6242                 topheader_prev = topheader;
6243         }
6244         /*
6245          * If header isn't NULL, we've found the right type.  There may be
6246          * IGNORE rdatasets between the top of the chain and the first real
6247          * data.  We skip over them.
6248          */
6249         header = topheader;
6250         while (header != NULL && IGNORE(header))
6251                 header = header->down;
6252         if (header != NULL && EXISTS(header)) {
6253                 unsigned int flags = 0;
6254                 subresult = NULL;
6255                 result = ISC_R_SUCCESS;
6256                 if ((options & DNS_DBSUB_EXACT) != 0) {
6257                         flags |= DNS_RDATASLAB_EXACT;
6258                         if (newheader->rdh_ttl != header->rdh_ttl)
6259                                 result = DNS_R_NOTEXACT;
6260                 }
6261                 if (result == ISC_R_SUCCESS)
6262                         result = dns_rdataslab_subtract(
6263                                         (unsigned char *)header,
6264                                         (unsigned char *)newheader,
6265                                         (unsigned int)(sizeof(*newheader)),
6266                                         rbtdb->common.mctx,
6267                                         rbtdb->common.rdclass,
6268                                         (dns_rdatatype_t)header->type,
6269                                         flags, &subresult);
6270                 if (result == ISC_R_SUCCESS) {
6271                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6272                         newheader = (rdatasetheader_t *)subresult;
6273                         init_rdataset(rbtdb, newheader);
6274                         /*
6275                          * We have to set the serial since the rdataslab
6276                          * subtraction routine copies the reserved portion of
6277                          * header, not newheader.
6278                          */
6279                         newheader->serial = rbtversion->serial;
6280                         /*
6281                          * XXXJT: dns_rdataslab_subtract() copied the pointers
6282                          * to additional info.  We need to clear these fields
6283                          * to avoid having duplicated references.
6284                          */
6285                         newheader->additional_auth = NULL;
6286                         newheader->additional_glue = NULL;
6287                 } else if (result == DNS_R_NXRRSET) {
6288                         /*
6289                          * This subtraction would remove all of the rdata;
6290                          * add a nonexistent header instead.
6291                          */
6292                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6293                         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6294                         if (newheader == NULL) {
6295                                 result = ISC_R_NOMEMORY;
6296                                 goto unlock;
6297                         }
6298                         set_ttl(rbtdb, newheader, 0);
6299                         newheader->type = topheader->type;
6300                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6301                         newheader->trust = 0;
6302                         newheader->serial = rbtversion->serial;
6303                         newheader->noqname = NULL;
6304                         newheader->closest = NULL;
6305                         newheader->count = 0;
6306                         newheader->additional_auth = NULL;
6307                         newheader->additional_glue = NULL;
6308                         newheader->node = rbtnode;
6309                         newheader->resign = 0;
6310                         newheader->last_used = 0;
6311                 } else {
6312                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6313                         goto unlock;
6314                 }
6315
6316                 /*
6317                  * If we're here, we want to link newheader in front of
6318                  * topheader.
6319                  */
6320                 INSIST(rbtversion->serial >= topheader->serial);
6321                 if (topheader_prev != NULL)
6322                         topheader_prev->next = newheader;
6323                 else
6324                         rbtnode->data = newheader;
6325                 newheader->next = topheader->next;
6326                 newheader->down = topheader;
6327                 topheader->next = newheader;
6328                 rbtnode->dirty = 1;
6329                 changed->dirty = ISC_TRUE;
6330         } else {
6331                 /*
6332                  * The rdataset doesn't exist, so we don't need to do anything
6333                  * to satisfy the deletion request.
6334                  */
6335                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6336                 if ((options & DNS_DBSUB_EXACT) != 0)
6337                         result = DNS_R_NOTEXACT;
6338                 else
6339                         result = DNS_R_UNCHANGED;
6340         }
6341
6342         if (result == ISC_R_SUCCESS && newrdataset != NULL)
6343                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6344
6345  unlock:
6346         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6347                     isc_rwlocktype_write);
6348
6349         /*
6350          * Update the zone's secure status.  If version is non-NULL
6351          * this is deferred until closeversion() is called.
6352          */
6353         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6354                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6355
6356         return (result);
6357 }
6358
6359 static isc_result_t
6360 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6361                dns_rdatatype_t type, dns_rdatatype_t covers)
6362 {
6363         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6364         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6365         rbtdb_version_t *rbtversion = version;
6366         isc_result_t result;
6367         rdatasetheader_t *newheader;
6368
6369         REQUIRE(VALID_RBTDB(rbtdb));
6370
6371         if (type == dns_rdatatype_any)
6372                 return (ISC_R_NOTIMPLEMENTED);
6373         if (type == dns_rdatatype_rrsig && covers == 0)
6374                 return (ISC_R_NOTIMPLEMENTED);
6375
6376         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6377         if (newheader == NULL)
6378                 return (ISC_R_NOMEMORY);
6379         set_ttl(rbtdb, newheader, 0);
6380         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6381         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6382         newheader->trust = 0;
6383         newheader->noqname = NULL;
6384         newheader->closest = NULL;
6385         newheader->additional_auth = NULL;
6386         newheader->additional_glue = NULL;
6387         if (rbtversion != NULL)
6388                 newheader->serial = rbtversion->serial;
6389         else
6390                 newheader->serial = 0;
6391         newheader->count = 0;
6392         newheader->last_used = 0;
6393         newheader->node = rbtnode;
6394
6395         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6396                   isc_rwlocktype_write);
6397
6398         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6399                      ISC_FALSE, NULL, 0);
6400
6401         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6402                     isc_rwlocktype_write);
6403
6404         /*
6405          * Update the zone's secure status.  If version is non-NULL
6406          * this is deferred until closeversion() is called.
6407          */
6408         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6409                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6410
6411         return (result);
6412 }
6413
6414 static isc_result_t
6415 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6416         rbtdb_load_t *loadctx = arg;
6417         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6418         dns_rbtnode_t *node;
6419         isc_result_t result;
6420         isc_region_t region;
6421         rdatasetheader_t *newheader;
6422
6423         /*
6424          * This routine does no node locking.  See comments in
6425          * 'load' below for more information on loading and
6426          * locking.
6427          */
6428
6429
6430         /*
6431          * SOA records are only allowed at top of zone.
6432          */
6433         if (rdataset->type == dns_rdatatype_soa &&
6434             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6435                 return (DNS_R_NOTZONETOP);
6436
6437         if (rdataset->type != dns_rdatatype_nsec3 &&
6438             rdataset->covers != dns_rdatatype_nsec3)
6439                 add_empty_wildcards(rbtdb, name);
6440
6441         if (dns_name_iswildcard(name)) {
6442                 /*
6443                  * NS record owners cannot legally be wild cards.
6444                  */
6445                 if (rdataset->type == dns_rdatatype_ns)
6446                         return (DNS_R_INVALIDNS);
6447                 /*
6448                  * NSEC3 record owners cannot legally be wild cards.
6449                  */
6450                 if (rdataset->type == dns_rdatatype_nsec3)
6451                         return (DNS_R_INVALIDNSEC3);
6452                 result = add_wildcard_magic(rbtdb, name);
6453                 if (result != ISC_R_SUCCESS)
6454                         return (result);
6455         }
6456
6457         node = NULL;
6458         if (rdataset->type == dns_rdatatype_nsec3 ||
6459             rdataset->covers == dns_rdatatype_nsec3) {
6460                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6461                 if (result == ISC_R_SUCCESS)
6462                         node->nsec3 = 1;
6463         } else {
6464                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
6465                 if (result == ISC_R_SUCCESS)
6466                         node->nsec3 = 0;
6467         }
6468         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6469                 return (result);
6470         if (result != ISC_R_EXISTS) {
6471                 dns_name_t foundname;
6472                 dns_name_init(&foundname, NULL);
6473                 dns_rbt_namefromnode(node, &foundname);
6474 #ifdef DNS_RBT_USEHASH
6475                 node->locknum = node->hashval % rbtdb->node_lock_count;
6476 #else
6477                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6478                         rbtdb->node_lock_count;
6479 #endif
6480         }
6481
6482         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6483                                             &region,
6484                                             sizeof(rdatasetheader_t));
6485         if (result != ISC_R_SUCCESS)
6486                 return (result);
6487         newheader = (rdatasetheader_t *)region.base;
6488         init_rdataset(rbtdb, newheader);
6489         set_ttl(rbtdb, newheader,
6490                 rdataset->ttl + loadctx->now); /* XXX overflow check */
6491         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6492                                                 rdataset->covers);
6493         newheader->attributes = 0;
6494         newheader->trust = rdataset->trust;
6495         newheader->serial = 1;
6496         newheader->noqname = NULL;
6497         newheader->closest = NULL;
6498         newheader->count = init_count++;
6499         newheader->additional_auth = NULL;
6500         newheader->additional_glue = NULL;
6501         newheader->last_used = 0;
6502         newheader->node = node;
6503         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6504                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6505                 newheader->resign = rdataset->resign;
6506         } else
6507                 newheader->resign = 0;
6508
6509         result = add(rbtdb, node, rbtdb->current_version, newheader,
6510                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6511         if (result == ISC_R_SUCCESS &&
6512             delegating_type(rbtdb, node, rdataset->type))
6513                 node->find_callback = 1;
6514         else if (result == DNS_R_UNCHANGED)
6515                 result = ISC_R_SUCCESS;
6516
6517         return (result);
6518 }
6519
6520 static isc_result_t
6521 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6522         rbtdb_load_t *loadctx;
6523         dns_rbtdb_t *rbtdb;
6524
6525         rbtdb = (dns_rbtdb_t *)db;
6526
6527         REQUIRE(VALID_RBTDB(rbtdb));
6528
6529         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6530         if (loadctx == NULL)
6531                 return (ISC_R_NOMEMORY);
6532
6533         loadctx->rbtdb = rbtdb;
6534         if (IS_CACHE(rbtdb))
6535                 isc_stdtime_get(&loadctx->now);
6536         else
6537                 loadctx->now = 0;
6538
6539         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6540
6541         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6542                 == 0);
6543         rbtdb->attributes |= RBTDB_ATTR_LOADING;
6544
6545         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6546
6547         *addp = loading_addrdataset;
6548         *dbloadp = loadctx;
6549
6550         return (ISC_R_SUCCESS);
6551 }
6552
6553 static isc_result_t
6554 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6555         rbtdb_load_t *loadctx;
6556         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6557
6558         REQUIRE(VALID_RBTDB(rbtdb));
6559         REQUIRE(dbloadp != NULL);
6560         loadctx = *dbloadp;
6561         REQUIRE(loadctx->rbtdb == rbtdb);
6562
6563         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6564
6565         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6566         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6567
6568         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6569         rbtdb->attributes |= RBTDB_ATTR_LOADED;
6570
6571         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6572
6573         /*
6574          * If there's a KEY rdataset at the zone origin containing a
6575          * zone key, we consider the zone secure.
6576          */
6577         if (! IS_CACHE(rbtdb))
6578                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6579
6580         *dbloadp = NULL;
6581
6582         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6583
6584         return (ISC_R_SUCCESS);
6585 }
6586
6587 static isc_result_t
6588 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6589      dns_masterformat_t masterformat) {
6590         dns_rbtdb_t *rbtdb;
6591
6592         rbtdb = (dns_rbtdb_t *)db;
6593
6594         REQUIRE(VALID_RBTDB(rbtdb));
6595
6596         return (dns_master_dump2(rbtdb->common.mctx, db, version,
6597                                  &dns_master_style_default,
6598                                  filename, masterformat));
6599 }
6600
6601 static void
6602 delete_callback(void *data, void *arg) {
6603         dns_rbtdb_t *rbtdb = arg;
6604         rdatasetheader_t *current, *next;
6605         unsigned int locknum;
6606
6607         current = data;
6608         locknum = current->node->locknum;
6609         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6610         while (current != NULL) {
6611                 next = current->next;
6612                 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6613                 current = next;
6614         }
6615         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6616 }
6617
6618 static isc_boolean_t
6619 issecure(dns_db_t *db) {
6620         dns_rbtdb_t *rbtdb;
6621         isc_boolean_t secure;
6622
6623         rbtdb = (dns_rbtdb_t *)db;
6624
6625         REQUIRE(VALID_RBTDB(rbtdb));
6626
6627         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6628         secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6629         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6630
6631         return (secure);
6632 }
6633
6634 static isc_boolean_t
6635 isdnssec(dns_db_t *db) {
6636         dns_rbtdb_t *rbtdb;
6637         isc_boolean_t dnssec;
6638
6639         rbtdb = (dns_rbtdb_t *)db;
6640
6641         REQUIRE(VALID_RBTDB(rbtdb));
6642
6643         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6644         dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6645         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6646
6647         return (dnssec);
6648 }
6649
6650 static unsigned int
6651 nodecount(dns_db_t *db) {
6652         dns_rbtdb_t *rbtdb;
6653         unsigned int count;
6654
6655         rbtdb = (dns_rbtdb_t *)db;
6656
6657         REQUIRE(VALID_RBTDB(rbtdb));
6658
6659         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6660         count = dns_rbt_nodecount(rbtdb->tree);
6661         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6662
6663         return (count);
6664 }
6665
6666 static void
6667 settask(dns_db_t *db, isc_task_t *task) {
6668         dns_rbtdb_t *rbtdb;
6669
6670         rbtdb = (dns_rbtdb_t *)db;
6671
6672         REQUIRE(VALID_RBTDB(rbtdb));
6673
6674         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6675         if (rbtdb->task != NULL)
6676                 isc_task_detach(&rbtdb->task);
6677         if (task != NULL)
6678                 isc_task_attach(task, &rbtdb->task);
6679         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6680 }
6681
6682 static isc_boolean_t
6683 ispersistent(dns_db_t *db) {
6684         UNUSED(db);
6685         return (ISC_FALSE);
6686 }
6687
6688 static isc_result_t
6689 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6690         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6691         dns_rbtnode_t *onode;
6692         isc_result_t result = ISC_R_SUCCESS;
6693
6694         REQUIRE(VALID_RBTDB(rbtdb));
6695         REQUIRE(nodep != NULL && *nodep == NULL);
6696
6697         /* Note that the access to origin_node doesn't require a DB lock */
6698         onode = (dns_rbtnode_t *)rbtdb->origin_node;
6699         if (onode != NULL) {
6700                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6701                 new_reference(rbtdb, onode);
6702                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6703
6704                 *nodep = rbtdb->origin_node;
6705         } else {
6706                 INSIST(IS_CACHE(rbtdb));
6707                 result = ISC_R_NOTFOUND;
6708         }
6709
6710         return (result);
6711 }
6712
6713 static isc_result_t
6714 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6715                    isc_uint8_t *flags, isc_uint16_t *iterations,
6716                    unsigned char *salt, size_t *salt_length)
6717 {
6718         dns_rbtdb_t *rbtdb;
6719         isc_result_t result = ISC_R_NOTFOUND;
6720         rbtdb_version_t *rbtversion = version;
6721
6722         rbtdb = (dns_rbtdb_t *)db;
6723
6724         REQUIRE(VALID_RBTDB(rbtdb));
6725
6726         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6727
6728         if (rbtversion == NULL)
6729                 rbtversion = rbtdb->current_version;
6730
6731         if (rbtversion->havensec3) {
6732                 if (hash != NULL)
6733                         *hash = rbtversion->hash;
6734                 if (salt != NULL && salt_length != NULL) {
6735                         REQUIRE(*salt_length >= rbtversion->salt_length);
6736                         memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6737                 }
6738                 if (salt_length != NULL)
6739                         *salt_length = rbtversion->salt_length;
6740                 if (iterations != NULL)
6741                         *iterations = rbtversion->iterations;
6742                 if (flags != NULL)
6743                         *flags = rbtversion->flags;
6744                 result = ISC_R_SUCCESS;
6745         }
6746         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6747
6748         return (result);
6749 }
6750
6751 static isc_result_t
6752 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6753         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6754         isc_stdtime_t oldresign;
6755         isc_result_t result = ISC_R_SUCCESS;
6756         rdatasetheader_t *header;
6757
6758         REQUIRE(VALID_RBTDB(rbtdb));
6759         REQUIRE(!IS_CACHE(rbtdb));
6760         REQUIRE(rdataset != NULL);
6761
6762         header = rdataset->private3;
6763         header--;
6764
6765         NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6766                   isc_rwlocktype_write);
6767
6768         oldresign = header->resign;
6769         header->resign = resign;
6770         if (header->heap_index != 0) {
6771                 INSIST(RESIGN(header));
6772                 if (resign == 0) {
6773                         isc_heap_delete(rbtdb->heaps[header->node->locknum],
6774                                         header->heap_index);
6775                         header->heap_index = 0;
6776                 } else if (resign < oldresign)
6777                         isc_heap_increased(rbtdb->heaps[header->node->locknum],
6778                                            header->heap_index);
6779                 else
6780                         isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6781                                            header->heap_index);
6782         } else if (resign && header->heap_index == 0) {
6783                 header->attributes |= RDATASET_ATTR_RESIGN;
6784                 result = resign_insert(rbtdb, header->node->locknum, header);
6785         }
6786         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6787                     isc_rwlocktype_write);
6788         return (result);
6789 }
6790
6791 static isc_result_t
6792 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
6793                dns_name_t *foundname)
6794 {
6795         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6796         rdatasetheader_t *header = NULL, *this;
6797         unsigned int i;
6798         isc_result_t result = ISC_R_NOTFOUND;
6799         unsigned int locknum;
6800
6801         REQUIRE(VALID_RBTDB(rbtdb));
6802
6803         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6804
6805         for (i = 0; i < rbtdb->node_lock_count; i++) {
6806                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
6807                 this = isc_heap_element(rbtdb->heaps[i], 1);
6808                 if (this == NULL) {
6809                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6810                                     isc_rwlocktype_read);
6811                         continue;
6812                 }
6813                 if (header == NULL)
6814                         header = this;
6815                 else if (isc_serial_lt(this->resign, header->resign)) {
6816                         locknum = header->node->locknum;
6817                         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
6818                                     isc_rwlocktype_read);
6819                         header = this;
6820                 } else
6821                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6822                                     isc_rwlocktype_read);
6823         }
6824
6825         if (header == NULL)
6826                 goto unlock;
6827
6828         bind_rdataset(rbtdb, header->node, header, 0, rdataset);
6829
6830         if (foundname != NULL)
6831                 dns_rbt_fullnamefromnode(header->node, foundname);
6832
6833         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6834                     isc_rwlocktype_read);
6835
6836         result = ISC_R_SUCCESS;
6837
6838  unlock:
6839         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6840
6841         return (result);
6842 }
6843
6844 static void
6845 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
6846 {
6847         rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
6848         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6849         dns_rbtnode_t *node;
6850         rdatasetheader_t *header;
6851
6852         REQUIRE(VALID_RBTDB(rbtdb));
6853         REQUIRE(rdataset != NULL);
6854         REQUIRE(rbtdb->future_version == rbtversion);
6855         REQUIRE(rbtversion->writer);
6856
6857         node = rdataset->private2;
6858         header = rdataset->private3;
6859         header--;
6860
6861         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6862         NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
6863                   isc_rwlocktype_write);
6864         /*
6865          * Delete from heap and save to re-signed list so that it can
6866          * be restored if we backout of this change.
6867          */
6868         new_reference(rbtdb, node);
6869         isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
6870         header->heap_index = 0;
6871         ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
6872
6873         NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
6874                     isc_rwlocktype_write);
6875         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6876 }
6877
6878 static dns_stats_t *
6879 getrrsetstats(dns_db_t *db) {
6880         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6881
6882         REQUIRE(VALID_RBTDB(rbtdb));
6883         REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6884
6885         return (rbtdb->rrsetstats);
6886 }
6887
6888 static dns_dbmethods_t zone_methods = {
6889         attach,
6890         detach,
6891         beginload,
6892         endload,
6893         dump,
6894         currentversion,
6895         newversion,
6896         attachversion,
6897         closeversion,
6898         findnode,
6899         zone_find,
6900         zone_findzonecut,
6901         attachnode,
6902         detachnode,
6903         expirenode,
6904         printnode,
6905         createiterator,
6906         zone_findrdataset,
6907         allrdatasets,
6908         addrdataset,
6909         subtractrdataset,
6910         deleterdataset,
6911         issecure,
6912         nodecount,
6913         ispersistent,
6914         overmem,
6915         settask,
6916         getoriginnode,
6917         NULL,
6918         getnsec3parameters,
6919         findnsec3node,
6920         setsigningtime,
6921         getsigningtime,
6922         resigned,
6923         isdnssec,
6924         NULL
6925 };
6926
6927 static dns_dbmethods_t cache_methods = {
6928         attach,
6929         detach,
6930         beginload,
6931         endload,
6932         dump,
6933         currentversion,
6934         newversion,
6935         attachversion,
6936         closeversion,
6937         findnode,
6938         cache_find,
6939         cache_findzonecut,
6940         attachnode,
6941         detachnode,
6942         expirenode,
6943         printnode,
6944         createiterator,
6945         cache_findrdataset,
6946         allrdatasets,
6947         addrdataset,
6948         subtractrdataset,
6949         deleterdataset,
6950         issecure,
6951         nodecount,
6952         ispersistent,
6953         overmem,
6954         settask,
6955         getoriginnode,
6956         NULL,
6957         NULL,
6958         NULL,
6959         NULL,
6960         NULL,
6961         NULL,
6962         isdnssec,
6963         getrrsetstats
6964 };
6965
6966 isc_result_t
6967 #ifdef DNS_RBTDB_VERSION64
6968 dns_rbtdb64_create
6969 #else
6970 dns_rbtdb_create
6971 #endif
6972                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6973                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6974                  void *driverarg, dns_db_t **dbp)
6975 {
6976         dns_rbtdb_t *rbtdb;
6977         isc_result_t result;
6978         int i;
6979         dns_name_t name;
6980         isc_boolean_t (*sooner)(void *, void *);
6981         isc_mem_t *hmctx = mctx;
6982
6983         /* Keep the compiler happy. */
6984         UNUSED(driverarg);
6985
6986         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
6987         if (rbtdb == NULL)
6988                 return (ISC_R_NOMEMORY);
6989
6990         /*
6991          * If argv[0] exists, it points to a memory context to use for heap
6992          */
6993         if (argc != 0)
6994                 hmctx = (isc_mem_t *) argv[0];
6995
6996         memset(rbtdb, '\0', sizeof(*rbtdb));
6997         dns_name_init(&rbtdb->common.origin, NULL);
6998         rbtdb->common.attributes = 0;
6999         if (type == dns_dbtype_cache) {
7000                 rbtdb->common.methods = &cache_methods;
7001                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
7002         } else if (type == dns_dbtype_stub) {
7003                 rbtdb->common.methods = &zone_methods;
7004                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
7005         } else
7006                 rbtdb->common.methods = &zone_methods;
7007         rbtdb->common.rdclass = rdclass;
7008         rbtdb->common.mctx = NULL;
7009
7010         result = RBTDB_INITLOCK(&rbtdb->lock);
7011         if (result != ISC_R_SUCCESS)
7012                 goto cleanup_rbtdb;
7013
7014         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
7015         if (result != ISC_R_SUCCESS)
7016                 goto cleanup_lock;
7017
7018         /*
7019          * Initialize node_lock_count in a generic way to support future
7020          * extension which allows the user to specify this value on creation.
7021          * Note that when specified for a cache DB it must be larger than 1
7022          * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
7023          */
7024         if (rbtdb->node_lock_count == 0) {
7025                 if (IS_CACHE(rbtdb))
7026                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
7027                 else
7028                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
7029         } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
7030                 result = ISC_R_RANGE;
7031                 goto cleanup_tree_lock;
7032         }
7033         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
7034         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
7035                                         sizeof(rbtdb_nodelock_t));
7036         if (rbtdb->node_locks == NULL) {
7037                 result = ISC_R_NOMEMORY;
7038                 goto cleanup_tree_lock;
7039         }
7040
7041         rbtdb->rrsetstats = NULL;
7042         if (IS_CACHE(rbtdb)) {
7043                 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
7044                 if (result != ISC_R_SUCCESS)
7045                         goto cleanup_node_locks;
7046                 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
7047                                                sizeof(rdatasetheaderlist_t));
7048                 if (rbtdb->rdatasets == NULL) {
7049                         result = ISC_R_NOMEMORY;
7050                         goto cleanup_rrsetstats;
7051                 }
7052                 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7053                         ISC_LIST_INIT(rbtdb->rdatasets[i]);
7054         } else
7055                 rbtdb->rdatasets = NULL;
7056
7057         /*
7058          * Create the heaps.
7059          */
7060         rbtdb->heaps = isc_mem_get(hmctx, rbtdb->node_lock_count *
7061                                    sizeof(isc_heap_t *));
7062         if (rbtdb->heaps == NULL) {
7063                 result = ISC_R_NOMEMORY;
7064                 goto cleanup_rdatasets;
7065         }
7066         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7067                 rbtdb->heaps[i] = NULL;
7068         sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
7069         for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
7070                 result = isc_heap_create(hmctx, sooner, set_index, 0,
7071                                          &rbtdb->heaps[i]);
7072                 if (result != ISC_R_SUCCESS)
7073                         goto cleanup_heaps;
7074         }
7075
7076         /*
7077          * Create deadnode lists.
7078          */
7079         rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
7080                                        sizeof(rbtnodelist_t));
7081         if (rbtdb->deadnodes == NULL) {
7082                 result = ISC_R_NOMEMORY;
7083                 goto cleanup_heaps;
7084         }
7085         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7086                 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7087
7088         rbtdb->active = rbtdb->node_lock_count;
7089
7090         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7091                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7092                 if (result == ISC_R_SUCCESS) {
7093                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7094                         if (result != ISC_R_SUCCESS)
7095                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7096                 }
7097                 if (result != ISC_R_SUCCESS) {
7098                         while (i-- > 0) {
7099                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7100                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7101                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7102                         }
7103                         goto cleanup_deadnodes;
7104                 }
7105                 rbtdb->node_locks[i].exiting = ISC_FALSE;
7106         }
7107
7108         /*
7109          * Attach to the mctx.  The database will persist so long as there
7110          * are references to it, and attaching to the mctx ensures that our
7111          * mctx won't disappear out from under us.
7112          */
7113         isc_mem_attach(mctx, &rbtdb->common.mctx);
7114         isc_mem_attach(hmctx, &rbtdb->hmctx);
7115
7116         /*
7117          * Must be initialized before free_rbtdb() is called.
7118          */
7119         isc_ondestroy_init(&rbtdb->common.ondest);
7120
7121         /*
7122          * Make a copy of the origin name.
7123          */
7124         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7125         if (result != ISC_R_SUCCESS) {
7126                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7127                 return (result);
7128         }
7129
7130         /*
7131          * Make the Red-Black Trees.
7132          */
7133         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7134         if (result != ISC_R_SUCCESS) {
7135                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7136                 return (result);
7137         }
7138
7139         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7140         if (result != ISC_R_SUCCESS) {
7141                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7142                 return (result);
7143         }
7144
7145         /*
7146          * In order to set the node callback bit correctly in zone databases,
7147          * we need to know if the node has the origin name of the zone.
7148          * In loading_addrdataset() we could simply compare the new name
7149          * to the origin name, but this is expensive.  Also, we don't know the
7150          * node name in addrdataset(), so we need another way of knowing the
7151          * zone's top.
7152          *
7153          * We now explicitly create a node for the zone's origin, and then
7154          * we simply remember the node's address.  This is safe, because
7155          * the top-of-zone node can never be deleted, nor can its address
7156          * change.
7157          */
7158         if (!IS_CACHE(rbtdb)) {
7159                 dns_rbtnode_t *nsec3node;
7160
7161                 rbtdb->origin_node = NULL;
7162                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7163                                          &rbtdb->origin_node);
7164                 if (result != ISC_R_SUCCESS) {
7165                         INSIST(result != ISC_R_EXISTS);
7166                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7167                         return (result);
7168                 }
7169                 rbtdb->origin_node->nsec3 = 0;
7170                 /*
7171                  * We need to give the origin node the right locknum.
7172                  */
7173                 dns_name_init(&name, NULL);
7174                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7175 #ifdef DNS_RBT_USEHASH
7176                 rbtdb->origin_node->locknum =
7177                         rbtdb->origin_node->hashval %
7178                         rbtdb->node_lock_count;
7179 #else
7180                 rbtdb->origin_node->locknum =
7181                         dns_name_hash(&name, ISC_TRUE) %
7182                         rbtdb->node_lock_count;
7183 #endif
7184                 /*
7185                  * Add an apex node to the NSEC3 tree so that NSEC3 searches
7186                  * return partial matches when there is only a single NSEC3
7187                  * record in the tree.
7188                  */
7189                 nsec3node = NULL;
7190                 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
7191                                          &nsec3node);
7192                 if (result != ISC_R_SUCCESS) {
7193                         INSIST(result != ISC_R_EXISTS);
7194                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7195                         return (result);
7196                 }
7197                 nsec3node->nsec3 = 1;
7198                 /*
7199                  * We need to give the nsec3 origin node the right locknum.
7200                  */
7201                 dns_name_init(&name, NULL);
7202                 dns_rbt_namefromnode(nsec3node, &name);
7203 #ifdef DNS_RBT_USEHASH
7204                 nsec3node->locknum = nsec3node->hashval %
7205                         rbtdb->node_lock_count;
7206 #else
7207                 nsec3node->locknum = dns_name_hash(&name, ISC_TRUE) %
7208                         rbtdb->node_lock_count;
7209 #endif
7210         }
7211
7212         /*
7213          * Misc. Initialization.
7214          */
7215         result = isc_refcount_init(&rbtdb->references, 1);
7216         if (result != ISC_R_SUCCESS) {
7217                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7218                 return (result);
7219         }
7220         rbtdb->attributes = 0;
7221         rbtdb->task = NULL;
7222
7223         /*
7224          * Version Initialization.
7225          */
7226         rbtdb->current_serial = 1;
7227         rbtdb->least_serial = 1;
7228         rbtdb->next_serial = 2;
7229         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7230         if (rbtdb->current_version == NULL) {
7231                 isc_refcount_decrement(&rbtdb->references, NULL);
7232                 isc_refcount_destroy(&rbtdb->references);
7233                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7234                 return (ISC_R_NOMEMORY);
7235         }
7236         rbtdb->current_version->secure = dns_db_insecure;
7237         rbtdb->current_version->havensec3 = ISC_FALSE;
7238         rbtdb->current_version->flags = 0;
7239         rbtdb->current_version->iterations = 0;
7240         rbtdb->current_version->hash = 0;
7241         rbtdb->current_version->salt_length = 0;
7242         memset(rbtdb->current_version->salt, 0,
7243                sizeof(rbtdb->current_version->salt));
7244         rbtdb->future_version = NULL;
7245         ISC_LIST_INIT(rbtdb->open_versions);
7246         /*
7247          * Keep the current version in the open list so that list operation
7248          * won't happen in normal lookup operations.
7249          */
7250         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7251
7252         rbtdb->common.magic = DNS_DB_MAGIC;
7253         rbtdb->common.impmagic = RBTDB_MAGIC;
7254
7255         *dbp = (dns_db_t *)rbtdb;
7256
7257         return (ISC_R_SUCCESS);
7258
7259  cleanup_deadnodes:
7260         isc_mem_put(mctx, rbtdb->deadnodes,
7261                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7262
7263  cleanup_heaps:
7264         if (rbtdb->heaps != NULL) {
7265                 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7266                         if (rbtdb->heaps[i] != NULL)
7267                                 isc_heap_destroy(&rbtdb->heaps[i]);
7268                 isc_mem_put(mctx, rbtdb->heaps,
7269                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
7270         }
7271
7272  cleanup_rdatasets:
7273         if (rbtdb->rdatasets != NULL)
7274                 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7275                             sizeof(rdatasetheaderlist_t));
7276  cleanup_rrsetstats:
7277         if (rbtdb->rrsetstats != NULL)
7278                 dns_stats_detach(&rbtdb->rrsetstats);
7279
7280  cleanup_node_locks:
7281         isc_mem_put(mctx, rbtdb->node_locks,
7282                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7283
7284  cleanup_tree_lock:
7285         isc_rwlock_destroy(&rbtdb->tree_lock);
7286
7287  cleanup_lock:
7288         RBTDB_DESTROYLOCK(&rbtdb->lock);
7289
7290  cleanup_rbtdb:
7291         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
7292         return (result);
7293 }
7294
7295
7296 /*
7297  * Slabbed Rdataset Methods
7298  */
7299
7300 static void
7301 rdataset_disassociate(dns_rdataset_t *rdataset) {
7302         dns_db_t *db = rdataset->private1;
7303         dns_dbnode_t *node = rdataset->private2;
7304
7305         detachnode(db, &node);
7306 }
7307
7308 static isc_result_t
7309 rdataset_first(dns_rdataset_t *rdataset) {
7310         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7311         unsigned int count;
7312
7313         count = raw[0] * 256 + raw[1];
7314         if (count == 0) {
7315                 rdataset->private5 = NULL;
7316                 return (ISC_R_NOMORE);
7317         }
7318
7319 #if DNS_RDATASET_FIXED
7320         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7321                 raw += 2 + (4 * count);
7322         else
7323 #endif
7324                 raw += 2;
7325
7326         /*
7327          * The privateuint4 field is the number of rdata beyond the
7328          * cursor position, so we decrement the total count by one
7329          * before storing it.
7330          *
7331          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7332          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7333          * to the first entry in the offset table.
7334          */
7335         count--;
7336         rdataset->privateuint4 = count;
7337         rdataset->private5 = raw;
7338
7339         return (ISC_R_SUCCESS);
7340 }
7341
7342 static isc_result_t
7343 rdataset_next(dns_rdataset_t *rdataset) {
7344         unsigned int count;
7345         unsigned int length;
7346         unsigned char *raw;     /* RDATASLAB */
7347
7348         count = rdataset->privateuint4;
7349         if (count == 0)
7350                 return (ISC_R_NOMORE);
7351         count--;
7352         rdataset->privateuint4 = count;
7353
7354         /*
7355          * Skip forward one record (length + 4) or one offset (4).
7356          */
7357         raw = rdataset->private5;
7358 #if DNS_RDATASET_FIXED
7359         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7360 #endif
7361                 length = raw[0] * 256 + raw[1];
7362                 raw += length;
7363 #if DNS_RDATASET_FIXED
7364         }
7365         rdataset->private5 = raw + 4;           /* length(2) + order(2) */
7366 #else
7367         rdataset->private5 = raw + 2;           /* length(2) */
7368 #endif
7369
7370         return (ISC_R_SUCCESS);
7371 }
7372
7373 static void
7374 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7375         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
7376 #if DNS_RDATASET_FIXED
7377         unsigned int offset;
7378 #endif
7379         unsigned int length;
7380         isc_region_t r;
7381         unsigned int flags = 0;
7382
7383         REQUIRE(raw != NULL);
7384
7385         /*
7386          * Find the start of the record if not already in private5
7387          * then skip the length and order fields.
7388          */
7389 #if DNS_RDATASET_FIXED
7390         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7391                 offset = (raw[0] << 24) + (raw[1] << 16) +
7392                          (raw[2] << 8) + raw[3];
7393                 raw = rdataset->private3;
7394                 raw += offset;
7395         }
7396 #endif
7397         length = raw[0] * 256 + raw[1];
7398 #if DNS_RDATASET_FIXED
7399         raw += 4;
7400 #else
7401         raw += 2;
7402 #endif
7403         if (rdataset->type == dns_rdatatype_rrsig) {
7404                 if (*raw & DNS_RDATASLAB_OFFLINE)
7405                         flags |= DNS_RDATA_OFFLINE;
7406                 length--;
7407                 raw++;
7408         }
7409         r.length = length;
7410         r.base = raw;
7411         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7412         rdata->flags |= flags;
7413 }
7414
7415 static void
7416 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7417         dns_db_t *db = source->private1;
7418         dns_dbnode_t *node = source->private2;
7419         dns_dbnode_t *cloned_node = NULL;
7420
7421         attachnode(db, node, &cloned_node);
7422         *target = *source;
7423
7424         /*
7425          * Reset iterator state.
7426          */
7427         target->privateuint4 = 0;
7428         target->private5 = NULL;
7429 }
7430
7431 static unsigned int
7432 rdataset_count(dns_rdataset_t *rdataset) {
7433         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7434         unsigned int count;
7435
7436         count = raw[0] * 256 + raw[1];
7437
7438         return (count);
7439 }
7440
7441 static isc_result_t
7442 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7443                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7444 {
7445         dns_db_t *db = rdataset->private1;
7446         dns_dbnode_t *node = rdataset->private2;
7447         dns_dbnode_t *cloned_node;
7448         struct noqname *noqname = rdataset->private6;
7449
7450         cloned_node = NULL;
7451         attachnode(db, node, &cloned_node);
7452         nsec->methods = &rdataset_methods;
7453         nsec->rdclass = db->rdclass;
7454         nsec->type = noqname->type;
7455         nsec->covers = 0;
7456         nsec->ttl = rdataset->ttl;
7457         nsec->trust = rdataset->trust;
7458         nsec->private1 = rdataset->private1;
7459         nsec->private2 = rdataset->private2;
7460         nsec->private3 = noqname->neg;
7461         nsec->privateuint4 = 0;
7462         nsec->private5 = NULL;
7463         nsec->private6 = NULL;
7464         nsec->private7 = NULL;
7465
7466         cloned_node = NULL;
7467         attachnode(db, node, &cloned_node);
7468         nsecsig->methods = &rdataset_methods;
7469         nsecsig->rdclass = db->rdclass;
7470         nsecsig->type = dns_rdatatype_rrsig;
7471         nsecsig->covers = noqname->type;
7472         nsecsig->ttl = rdataset->ttl;
7473         nsecsig->trust = rdataset->trust;
7474         nsecsig->private1 = rdataset->private1;
7475         nsecsig->private2 = rdataset->private2;
7476         nsecsig->private3 = noqname->negsig;
7477         nsecsig->privateuint4 = 0;
7478         nsecsig->private5 = NULL;
7479         nsec->private6 = NULL;
7480         nsec->private7 = NULL;
7481
7482         dns_name_clone(&noqname->name, name);
7483
7484         return (ISC_R_SUCCESS);
7485 }
7486
7487 static isc_result_t
7488 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7489                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7490 {
7491         dns_db_t *db = rdataset->private1;
7492         dns_dbnode_t *node = rdataset->private2;
7493         dns_dbnode_t *cloned_node;
7494         struct noqname *closest = rdataset->private7;
7495
7496         cloned_node = NULL;
7497         attachnode(db, node, &cloned_node);
7498         nsec->methods = &rdataset_methods;
7499         nsec->rdclass = db->rdclass;
7500         nsec->type = closest->type;
7501         nsec->covers = 0;
7502         nsec->ttl = rdataset->ttl;
7503         nsec->trust = rdataset->trust;
7504         nsec->private1 = rdataset->private1;
7505         nsec->private2 = rdataset->private2;
7506         nsec->private3 = closest->neg;
7507         nsec->privateuint4 = 0;
7508         nsec->private5 = NULL;
7509         nsec->private6 = NULL;
7510         nsec->private7 = NULL;
7511
7512         cloned_node = NULL;
7513         attachnode(db, node, &cloned_node);
7514         nsecsig->methods = &rdataset_methods;
7515         nsecsig->rdclass = db->rdclass;
7516         nsecsig->type = dns_rdatatype_rrsig;
7517         nsecsig->covers = closest->type;
7518         nsecsig->ttl = rdataset->ttl;
7519         nsecsig->trust = rdataset->trust;
7520         nsecsig->private1 = rdataset->private1;
7521         nsecsig->private2 = rdataset->private2;
7522         nsecsig->private3 = closest->negsig;
7523         nsecsig->privateuint4 = 0;
7524         nsecsig->private5 = NULL;
7525         nsec->private6 = NULL;
7526         nsec->private7 = NULL;
7527
7528         dns_name_clone(&closest->name, name);
7529
7530         return (ISC_R_SUCCESS);
7531 }
7532
7533 static void
7534 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
7535         dns_rbtdb_t *rbtdb = rdataset->private1;
7536         dns_rbtnode_t *rbtnode = rdataset->private2;
7537         rdatasetheader_t *header = rdataset->private3;
7538
7539         header--;
7540         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7541                   isc_rwlocktype_write);
7542         header->trust = rdataset->trust = trust;
7543         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7544                   isc_rwlocktype_write);
7545 }
7546
7547 static void
7548 rdataset_expire(dns_rdataset_t *rdataset) {
7549         dns_rbtdb_t *rbtdb = rdataset->private1;
7550         dns_rbtnode_t *rbtnode = rdataset->private2;
7551         rdatasetheader_t *header = rdataset->private3;
7552
7553         header--;
7554         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7555                   isc_rwlocktype_write);
7556         expire_header(rbtdb, header, ISC_FALSE);
7557         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7558                   isc_rwlocktype_write);
7559 }
7560
7561 /*
7562  * Rdataset Iterator Methods
7563  */
7564
7565 static void
7566 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7567         rbtdb_rdatasetiter_t *rbtiterator;
7568
7569         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7570
7571         if (rbtiterator->common.version != NULL)
7572                 closeversion(rbtiterator->common.db,
7573                              &rbtiterator->common.version, ISC_FALSE);
7574         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7575         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7576                     sizeof(*rbtiterator));
7577
7578         *iteratorp = NULL;
7579 }
7580
7581 static isc_result_t
7582 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7583         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7584         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7585         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7586         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7587         rdatasetheader_t *header, *top_next;
7588         rbtdb_serial_t serial;
7589         isc_stdtime_t now;
7590
7591         if (IS_CACHE(rbtdb)) {
7592                 serial = 1;
7593                 now = rbtiterator->common.now;
7594         } else {
7595                 serial = rbtversion->serial;
7596                 now = 0;
7597         }
7598
7599         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7600                   isc_rwlocktype_read);
7601
7602         for (header = rbtnode->data; header != NULL; header = top_next) {
7603                 top_next = header->next;
7604                 do {
7605                         if (header->serial <= serial && !IGNORE(header)) {
7606                                 /*
7607                                  * Is this a "this rdataset doesn't exist"
7608                                  * record?  Or is it too old in the cache?
7609                                  *
7610                                  * Note: unlike everywhere else, we
7611                                  * check for now > header->rdh_ttl instead
7612                                  * of now >= header->rdh_ttl.  This allows
7613                                  * ANY and RRSIG queries for 0 TTL
7614                                  * rdatasets to work.
7615                                  */
7616                                 if (NONEXISTENT(header) ||
7617                                     (now != 0 && now > header->rdh_ttl))
7618                                         header = NULL;
7619                                 break;
7620                         } else
7621                                 header = header->down;
7622                 } while (header != NULL);
7623                 if (header != NULL)
7624                         break;
7625         }
7626
7627         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7628                     isc_rwlocktype_read);
7629
7630         rbtiterator->current = header;
7631
7632         if (header == NULL)
7633                 return (ISC_R_NOMORE);
7634
7635         return (ISC_R_SUCCESS);
7636 }
7637
7638 static isc_result_t
7639 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7640         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7641         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7642         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7643         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7644         rdatasetheader_t *header, *top_next;
7645         rbtdb_serial_t serial;
7646         isc_stdtime_t now;
7647         rbtdb_rdatatype_t type, negtype;
7648         dns_rdatatype_t rdtype, covers;
7649
7650         header = rbtiterator->current;
7651         if (header == NULL)
7652                 return (ISC_R_NOMORE);
7653
7654         if (IS_CACHE(rbtdb)) {
7655                 serial = 1;
7656                 now = rbtiterator->common.now;
7657         } else {
7658                 serial = rbtversion->serial;
7659                 now = 0;
7660         }
7661
7662         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7663                   isc_rwlocktype_read);
7664
7665         type = header->type;
7666         rdtype = RBTDB_RDATATYPE_BASE(header->type);
7667         if (NEGATIVE(header)) {
7668                 covers = RBTDB_RDATATYPE_EXT(header->type);
7669                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7670         } else
7671                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7672         for (header = header->next; header != NULL; header = top_next) {
7673                 top_next = header->next;
7674                 /*
7675                  * If not walking back up the down list.
7676                  */
7677                 if (header->type != type && header->type != negtype) {
7678                         do {
7679                                 if (header->serial <= serial &&
7680                                     !IGNORE(header)) {
7681                                         /*
7682                                          * Is this a "this rdataset doesn't
7683                                          * exist" record?
7684                                          *
7685                                          * Note: unlike everywhere else, we
7686                                          * check for now > header->ttl instead
7687                                          * of now >= header->ttl.  This allows
7688                                          * ANY and RRSIG queries for 0 TTL
7689                                          * rdatasets to work.
7690                                          */
7691                                         if ((header->attributes &
7692                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
7693                                             (now != 0 && now > header->rdh_ttl))
7694                                                 header = NULL;
7695                                         break;
7696                                 } else
7697                                         header = header->down;
7698                         } while (header != NULL);
7699                         if (header != NULL)
7700                                 break;
7701                 }
7702         }
7703
7704         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7705                     isc_rwlocktype_read);
7706
7707         rbtiterator->current = header;
7708
7709         if (header == NULL)
7710                 return (ISC_R_NOMORE);
7711
7712         return (ISC_R_SUCCESS);
7713 }
7714
7715 static void
7716 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7717         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7718         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7719         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7720         rdatasetheader_t *header;
7721
7722         header = rbtiterator->current;
7723         REQUIRE(header != NULL);
7724
7725         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7726                   isc_rwlocktype_read);
7727
7728         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7729                       rdataset);
7730
7731         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7732                     isc_rwlocktype_read);
7733 }
7734
7735
7736 /*
7737  * Database Iterator Methods
7738  */
7739
7740 static inline void
7741 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7742         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7743         dns_rbtnode_t *node = rbtdbiter->node;
7744
7745         if (node == NULL)
7746                 return;
7747
7748         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7749         reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7750 }
7751
7752 static inline void
7753 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7754         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7755         dns_rbtnode_t *node = rbtdbiter->node;
7756         nodelock_t *lock;
7757
7758         if (node == NULL)
7759                 return;
7760
7761         lock = &rbtdb->node_locks[node->locknum].lock;
7762         NODE_LOCK(lock, isc_rwlocktype_read);
7763         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7764                             rbtdbiter->tree_locked, ISC_FALSE);
7765         NODE_UNLOCK(lock, isc_rwlocktype_read);
7766
7767         rbtdbiter->node = NULL;
7768 }
7769
7770 static void
7771 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7772         dns_rbtnode_t *node;
7773         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7774         isc_boolean_t was_read_locked = ISC_FALSE;
7775         nodelock_t *lock;
7776         int i;
7777
7778         if (rbtdbiter->delete != 0) {
7779                 /*
7780                  * Note that "%d node of %d in tree" can report things like
7781                  * "flush_deletions: 59 nodes of 41 in tree".  This means
7782                  * That some nodes appear on the deletions list more than
7783                  * once.  Only the last occurence will actually be deleted.
7784                  */
7785                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7786                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7787                               "flush_deletions: %d nodes of %d in tree",
7788                               rbtdbiter->delete,
7789                               dns_rbt_nodecount(rbtdb->tree));
7790
7791                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7792                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7793                         was_read_locked = ISC_TRUE;
7794                 }
7795                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7796                 rbtdbiter->tree_locked = isc_rwlocktype_write;
7797
7798                 for (i = 0; i < rbtdbiter->delete; i++) {
7799                         node = rbtdbiter->deletions[i];
7800                         lock = &rbtdb->node_locks[node->locknum].lock;
7801
7802                         NODE_LOCK(lock, isc_rwlocktype_read);
7803                         decrement_reference(rbtdb, node, 0,
7804                                             isc_rwlocktype_read,
7805                                             rbtdbiter->tree_locked, ISC_FALSE);
7806                         NODE_UNLOCK(lock, isc_rwlocktype_read);
7807                 }
7808
7809                 rbtdbiter->delete = 0;
7810
7811                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7812                 if (was_read_locked) {
7813                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7814                         rbtdbiter->tree_locked = isc_rwlocktype_read;
7815
7816                 } else {
7817                         rbtdbiter->tree_locked = isc_rwlocktype_none;
7818                 }
7819         }
7820 }
7821
7822 static inline void
7823 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
7824         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7825
7826         REQUIRE(rbtdbiter->paused);
7827         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
7828
7829         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7830         rbtdbiter->tree_locked = isc_rwlocktype_read;
7831
7832         rbtdbiter->paused = ISC_FALSE;
7833 }
7834
7835 static void
7836 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
7837         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
7838         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7839         dns_db_t *db = NULL;
7840
7841         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7842                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7843                 rbtdbiter->tree_locked = isc_rwlocktype_none;
7844         } else
7845                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
7846
7847         dereference_iter_node(rbtdbiter);
7848
7849         flush_deletions(rbtdbiter);
7850
7851         dns_db_attach(rbtdbiter->common.db, &db);
7852         dns_db_detach(&rbtdbiter->common.db);
7853
7854         dns_rbtnodechain_reset(&rbtdbiter->chain);
7855         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7856         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
7857         dns_db_detach(&db);
7858
7859         *iteratorp = NULL;
7860 }
7861
7862 static isc_result_t
7863 dbiterator_first(dns_dbiterator_t *iterator) {
7864         isc_result_t result;
7865         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7866         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7867         dns_name_t *name, *origin;
7868
7869         if (rbtdbiter->result != ISC_R_SUCCESS &&
7870             rbtdbiter->result != ISC_R_NOMORE)
7871                 return (rbtdbiter->result);
7872
7873         if (rbtdbiter->paused)
7874                 resume_iteration(rbtdbiter);
7875
7876         dereference_iter_node(rbtdbiter);
7877
7878         name = dns_fixedname_name(&rbtdbiter->name);
7879         origin = dns_fixedname_name(&rbtdbiter->origin);
7880         dns_rbtnodechain_reset(&rbtdbiter->chain);
7881         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7882
7883         if (rbtdbiter->nsec3only) {
7884                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7885                 result = dns_rbtnodechain_first(rbtdbiter->current,
7886                                                 rbtdb->nsec3, name, origin);
7887         } else {
7888                 rbtdbiter->current = &rbtdbiter->chain;
7889                 result = dns_rbtnodechain_first(rbtdbiter->current,
7890                                                 rbtdb->tree, name, origin);
7891                 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
7892                         rbtdbiter->current = &rbtdbiter->nsec3chain;
7893                         result = dns_rbtnodechain_first(rbtdbiter->current,
7894                                                         rbtdb->nsec3, name,
7895                                                         origin);
7896                 }
7897         }
7898         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7899                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7900                                                   NULL, &rbtdbiter->node);
7901                 if (result == ISC_R_SUCCESS) {
7902                         rbtdbiter->new_origin = ISC_TRUE;
7903                         reference_iter_node(rbtdbiter);
7904                 }
7905         } else {
7906                 INSIST(result == ISC_R_NOTFOUND);
7907                 result = ISC_R_NOMORE; /* The tree is empty. */
7908         }
7909
7910         rbtdbiter->result = result;
7911
7912         return (result);
7913 }
7914
7915 static isc_result_t
7916 dbiterator_last(dns_dbiterator_t *iterator) {
7917         isc_result_t result;
7918         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7919         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7920         dns_name_t *name, *origin;
7921
7922         if (rbtdbiter->result != ISC_R_SUCCESS &&
7923             rbtdbiter->result != ISC_R_NOMORE)
7924                 return (rbtdbiter->result);
7925
7926         if (rbtdbiter->paused)
7927                 resume_iteration(rbtdbiter);
7928
7929         dereference_iter_node(rbtdbiter);
7930
7931         name = dns_fixedname_name(&rbtdbiter->name);
7932         origin = dns_fixedname_name(&rbtdbiter->origin);
7933         dns_rbtnodechain_reset(&rbtdbiter->chain);
7934         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7935
7936         result = ISC_R_NOTFOUND;
7937         if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
7938                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7939                 result = dns_rbtnodechain_last(rbtdbiter->current,
7940                                                rbtdb->nsec3, name, origin);
7941         }
7942         if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
7943                 rbtdbiter->current = &rbtdbiter->chain;
7944                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7945                                                name, origin);
7946         }
7947         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7948                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7949                                                   NULL, &rbtdbiter->node);
7950                 if (result == ISC_R_SUCCESS) {
7951                         rbtdbiter->new_origin = ISC_TRUE;
7952                         reference_iter_node(rbtdbiter);
7953                 }
7954         } else {
7955                 INSIST(result == ISC_R_NOTFOUND);
7956                 result = ISC_R_NOMORE; /* The tree is empty. */
7957         }
7958
7959         rbtdbiter->result = result;
7960
7961         return (result);
7962 }
7963
7964 static isc_result_t
7965 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
7966         isc_result_t result;
7967         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7968         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7969         dns_name_t *iname, *origin;
7970
7971         if (rbtdbiter->result != ISC_R_SUCCESS &&
7972             rbtdbiter->result != ISC_R_NOTFOUND &&
7973             rbtdbiter->result != ISC_R_NOMORE)
7974                 return (rbtdbiter->result);
7975
7976         if (rbtdbiter->paused)
7977                 resume_iteration(rbtdbiter);
7978
7979         dereference_iter_node(rbtdbiter);
7980
7981         iname = dns_fixedname_name(&rbtdbiter->name);
7982         origin = dns_fixedname_name(&rbtdbiter->origin);
7983         dns_rbtnodechain_reset(&rbtdbiter->chain);
7984         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7985
7986         if (rbtdbiter->nsec3only) {
7987                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7988                 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7989                                           &rbtdbiter->node,
7990                                           rbtdbiter->current,
7991                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7992         } else if (rbtdbiter->nonsec3) {
7993                 rbtdbiter->current = &rbtdbiter->chain;
7994                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7995                                           &rbtdbiter->node,
7996                                           rbtdbiter->current,
7997                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7998         } else {
7999                 /*
8000                  * Stay on main chain if not found on either chain.
8001                  */
8002                 rbtdbiter->current = &rbtdbiter->chain;
8003                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8004                                           &rbtdbiter->node,
8005                                           rbtdbiter->current,
8006                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8007                 if (result == DNS_R_PARTIALMATCH) {
8008                         dns_rbtnode_t *node = NULL;
8009                         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8010                                                   &node, &rbtdbiter->nsec3chain,
8011                                                   DNS_RBTFIND_EMPTYDATA,
8012                                                   NULL, NULL);
8013                         if (result == ISC_R_SUCCESS) {
8014                                 rbtdbiter->node = node;
8015                                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8016                         }
8017                 }
8018         }
8019
8020 #if 1
8021         if (result == ISC_R_SUCCESS) {
8022                 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
8023                                                   origin, NULL);
8024                 if (result == ISC_R_SUCCESS) {
8025                         rbtdbiter->new_origin = ISC_TRUE;
8026                         reference_iter_node(rbtdbiter);
8027                 }
8028         } else if (result == DNS_R_PARTIALMATCH) {
8029                 result = ISC_R_NOTFOUND;
8030                 rbtdbiter->node = NULL;
8031         }
8032
8033         rbtdbiter->result = result;
8034 #else
8035         if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8036                 isc_result_t tresult;
8037                 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
8038                                                    origin, NULL);
8039                 if (tresult == ISC_R_SUCCESS) {
8040                         rbtdbiter->new_origin = ISC_TRUE;
8041                         reference_iter_node(rbtdbiter);
8042                 } else {
8043                         result = tresult;
8044                         rbtdbiter->node = NULL;
8045                 }
8046         } else
8047                 rbtdbiter->node = NULL;
8048
8049         rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
8050                             ISC_R_SUCCESS : result;
8051 #endif
8052
8053         return (result);
8054 }
8055
8056 static isc_result_t
8057 dbiterator_prev(dns_dbiterator_t *iterator) {
8058         isc_result_t result;
8059         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8060         dns_name_t *name, *origin;
8061         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8062
8063         REQUIRE(rbtdbiter->node != NULL);
8064
8065         if (rbtdbiter->result != ISC_R_SUCCESS)
8066                 return (rbtdbiter->result);
8067
8068         if (rbtdbiter->paused)
8069                 resume_iteration(rbtdbiter);
8070
8071         name = dns_fixedname_name(&rbtdbiter->name);
8072         origin = dns_fixedname_name(&rbtdbiter->origin);
8073         result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
8074         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8075             !rbtdbiter->nonsec3 &&
8076             &rbtdbiter->nsec3chain == rbtdbiter->current) {
8077                 rbtdbiter->current = &rbtdbiter->chain;
8078                 dns_rbtnodechain_reset(rbtdbiter->current);
8079                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8080                                                name, origin);
8081                 if (result == ISC_R_NOTFOUND)
8082                         result = ISC_R_NOMORE;
8083         }
8084
8085         dereference_iter_node(rbtdbiter);
8086
8087         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8088                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8089                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8090                                                   NULL, &rbtdbiter->node);
8091         }
8092
8093         if (result == ISC_R_SUCCESS)
8094                 reference_iter_node(rbtdbiter);
8095
8096         rbtdbiter->result = result;
8097
8098         return (result);
8099 }
8100
8101 static isc_result_t
8102 dbiterator_next(dns_dbiterator_t *iterator) {
8103         isc_result_t result;
8104         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8105         dns_name_t *name, *origin;
8106         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8107
8108         REQUIRE(rbtdbiter->node != NULL);
8109
8110         if (rbtdbiter->result != ISC_R_SUCCESS)
8111                 return (rbtdbiter->result);
8112
8113         if (rbtdbiter->paused)
8114                 resume_iteration(rbtdbiter);
8115
8116         name = dns_fixedname_name(&rbtdbiter->name);
8117         origin = dns_fixedname_name(&rbtdbiter->origin);
8118         result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8119         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8120             !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8121                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8122                 dns_rbtnodechain_reset(rbtdbiter->current);
8123                 result = dns_rbtnodechain_first(rbtdbiter->current,
8124                                                 rbtdb->nsec3, name, origin);
8125                 if (result == ISC_R_NOTFOUND)
8126                         result = ISC_R_NOMORE;
8127         }
8128
8129         dereference_iter_node(rbtdbiter);
8130
8131         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8132                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8133                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8134                                                   NULL, &rbtdbiter->node);
8135         }
8136         if (result == ISC_R_SUCCESS)
8137                 reference_iter_node(rbtdbiter);
8138
8139         rbtdbiter->result = result;
8140
8141         return (result);
8142 }
8143
8144 static isc_result_t
8145 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8146                    dns_name_t *name)
8147 {
8148         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8149         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8150         dns_rbtnode_t *node = rbtdbiter->node;
8151         isc_result_t result;
8152         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8153         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8154
8155         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8156         REQUIRE(rbtdbiter->node != NULL);
8157
8158         if (rbtdbiter->paused)
8159                 resume_iteration(rbtdbiter);
8160
8161         if (name != NULL) {
8162                 if (rbtdbiter->common.relative_names)
8163                         origin = NULL;
8164                 result = dns_name_concatenate(nodename, origin, name, NULL);
8165                 if (result != ISC_R_SUCCESS)
8166                         return (result);
8167                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8168                         result = DNS_R_NEWORIGIN;
8169         } else
8170                 result = ISC_R_SUCCESS;
8171
8172         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8173         new_reference(rbtdb, node);
8174         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8175
8176         *nodep = rbtdbiter->node;
8177
8178         if (iterator->cleaning && result == ISC_R_SUCCESS) {
8179                 isc_result_t expire_result;
8180
8181                 /*
8182                  * If the deletion array is full, flush it before trying
8183                  * to expire the current node.  The current node can't
8184                  * fully deleted while the iteration cursor is still on it.
8185                  */
8186                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8187                         flush_deletions(rbtdbiter);
8188
8189                 expire_result = expirenode(iterator->db, *nodep, 0);
8190
8191                 /*
8192                  * expirenode() currently always returns success.
8193                  */
8194                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8195                         unsigned int refs;
8196
8197                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
8198                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8199                         dns_rbtnode_refincrement(node, &refs);
8200                         INSIST(refs != 0);
8201                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8202                 }
8203         }
8204
8205         return (result);
8206 }
8207
8208 static isc_result_t
8209 dbiterator_pause(dns_dbiterator_t *iterator) {
8210         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8211         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8212
8213         if (rbtdbiter->result != ISC_R_SUCCESS &&
8214             rbtdbiter->result != ISC_R_NOMORE)
8215                 return (rbtdbiter->result);
8216
8217         if (rbtdbiter->paused)
8218                 return (ISC_R_SUCCESS);
8219
8220         rbtdbiter->paused = ISC_TRUE;
8221
8222         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8223                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8224                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8225                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8226         }
8227
8228         flush_deletions(rbtdbiter);
8229
8230         return (ISC_R_SUCCESS);
8231 }
8232
8233 static isc_result_t
8234 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8235         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8236         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8237
8238         if (rbtdbiter->result != ISC_R_SUCCESS)
8239                 return (rbtdbiter->result);
8240
8241         return (dns_name_copy(origin, name, NULL));
8242 }
8243
8244 /*%
8245  * Additional cache routines.
8246  */
8247 static isc_result_t
8248 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8249                        dns_rdatatype_t qtype, dns_acache_t *acache,
8250                        dns_zone_t **zonep, dns_db_t **dbp,
8251                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8252                        dns_name_t *fname, dns_message_t *msg,
8253                        isc_stdtime_t now)
8254 {
8255         dns_rbtdb_t *rbtdb = rdataset->private1;
8256         dns_rbtnode_t *rbtnode = rdataset->private2;
8257         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8258         unsigned int current_count = rdataset->privateuint4;
8259         unsigned int count;
8260         rdatasetheader_t *header;
8261         nodelock_t *nodelock;
8262         unsigned int total_count;
8263         acachectl_t *acarray;
8264         dns_acacheentry_t *entry;
8265         isc_result_t result;
8266
8267         UNUSED(qtype); /* we do not use this value at least for now */
8268         UNUSED(acache);
8269
8270         header = (struct rdatasetheader *)(raw - sizeof(*header));
8271
8272         total_count = raw[0] * 256 + raw[1];
8273         INSIST(total_count > current_count);
8274         count = total_count - current_count - 1;
8275
8276         acarray = NULL;
8277
8278         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8279         NODE_LOCK(nodelock, isc_rwlocktype_read);
8280
8281         switch (type) {
8282         case dns_rdatasetadditional_fromauth:
8283                 acarray = header->additional_auth;
8284                 break;
8285         case dns_rdatasetadditional_fromcache:
8286                 acarray = NULL;
8287                 break;
8288         case dns_rdatasetadditional_fromglue:
8289                 acarray = header->additional_glue;
8290                 break;
8291         default:
8292                 INSIST(0);
8293         }
8294
8295         if (acarray == NULL) {
8296                 if (type != dns_rdatasetadditional_fromcache)
8297                         dns_acache_countquerymiss(acache);
8298                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8299                 return (ISC_R_NOTFOUND);
8300         }
8301
8302         if (acarray[count].entry == NULL) {
8303                 dns_acache_countquerymiss(acache);
8304                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8305                 return (ISC_R_NOTFOUND);
8306         }
8307
8308         entry = NULL;
8309         dns_acache_attachentry(acarray[count].entry, &entry);
8310
8311         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8312
8313         result = dns_acache_getentry(entry, zonep, dbp, versionp,
8314                                      nodep, fname, msg, now);
8315
8316         dns_acache_detachentry(&entry);
8317
8318         return (result);
8319 }
8320
8321 static void
8322 acache_callback(dns_acacheentry_t *entry, void **arg) {
8323         dns_rbtdb_t *rbtdb;
8324         dns_rbtnode_t *rbtnode;
8325         nodelock_t *nodelock;
8326         acachectl_t *acarray = NULL;
8327         acache_cbarg_t *cbarg;
8328         unsigned int count;
8329
8330         REQUIRE(arg != NULL);
8331         cbarg = *arg;
8332
8333         /*
8334          * The caller must hold the entry lock.
8335          */
8336
8337         rbtdb = (dns_rbtdb_t *)cbarg->db;
8338         rbtnode = (dns_rbtnode_t *)cbarg->node;
8339
8340         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8341         NODE_LOCK(nodelock, isc_rwlocktype_write);
8342
8343         switch (cbarg->type) {
8344         case dns_rdatasetadditional_fromauth:
8345                 acarray = cbarg->header->additional_auth;
8346                 break;
8347         case dns_rdatasetadditional_fromglue:
8348                 acarray = cbarg->header->additional_glue;
8349                 break;
8350         default:
8351                 INSIST(0);
8352         }
8353
8354         count = cbarg->count;
8355         if (acarray != NULL && acarray[count].entry == entry) {
8356                 acarray[count].entry = NULL;
8357                 INSIST(acarray[count].cbarg == cbarg);
8358                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8359                 acarray[count].cbarg = NULL;
8360         } else
8361                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8362
8363         dns_acache_detachentry(&entry);
8364
8365         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8366
8367         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8368         dns_db_detach((dns_db_t **)(void*)&rbtdb);
8369
8370         *arg = NULL;
8371 }
8372
8373 static void
8374 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8375                       acache_cbarg_t **cbargp)
8376 {
8377         acache_cbarg_t *cbarg;
8378
8379         REQUIRE(mctx != NULL);
8380         REQUIRE(entry != NULL);
8381         REQUIRE(cbargp != NULL && *cbargp != NULL);
8382
8383         cbarg = *cbargp;
8384
8385         dns_acache_cancelentry(entry);
8386         dns_db_detachnode(cbarg->db, &cbarg->node);
8387         dns_db_detach(&cbarg->db);
8388
8389         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8390
8391         *cbargp = NULL;
8392 }
8393
8394 static isc_result_t
8395 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8396                        dns_rdatatype_t qtype, dns_acache_t *acache,
8397                        dns_zone_t *zone, dns_db_t *db,
8398                        dns_dbversion_t *version, dns_dbnode_t *node,
8399                        dns_name_t *fname)
8400 {
8401         dns_rbtdb_t *rbtdb = rdataset->private1;
8402         dns_rbtnode_t *rbtnode = rdataset->private2;
8403         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8404         unsigned int current_count = rdataset->privateuint4;
8405         rdatasetheader_t *header;
8406         unsigned int total_count, count;
8407         nodelock_t *nodelock;
8408         isc_result_t result;
8409         acachectl_t *acarray;
8410         dns_acacheentry_t *newentry, *oldentry = NULL;
8411         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8412
8413         UNUSED(qtype);
8414
8415         if (type == dns_rdatasetadditional_fromcache)
8416                 return (ISC_R_SUCCESS);
8417
8418         header = (struct rdatasetheader *)(raw - sizeof(*header));
8419
8420         total_count = raw[0] * 256 + raw[1];
8421         INSIST(total_count > current_count);
8422         count = total_count - current_count - 1; /* should be private data */
8423
8424         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8425         if (newcbarg == NULL)
8426                 return (ISC_R_NOMEMORY);
8427         newcbarg->type = type;
8428         newcbarg->count = count;
8429         newcbarg->header = header;
8430         newcbarg->db = NULL;
8431         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8432         newcbarg->node = NULL;
8433         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8434                           &newcbarg->node);
8435         newentry = NULL;
8436         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8437                                         acache_callback, newcbarg, &newentry);
8438         if (result != ISC_R_SUCCESS)
8439                 goto fail;
8440         /* Set cache data in the new entry. */
8441         result = dns_acache_setentry(acache, newentry, zone, db,
8442                                      version, node, fname);
8443         if (result != ISC_R_SUCCESS)
8444                 goto fail;
8445
8446         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8447         NODE_LOCK(nodelock, isc_rwlocktype_write);
8448
8449         acarray = NULL;
8450         switch (type) {
8451         case dns_rdatasetadditional_fromauth:
8452                 acarray = header->additional_auth;
8453                 break;
8454         case dns_rdatasetadditional_fromglue:
8455                 acarray = header->additional_glue;
8456                 break;
8457         default:
8458                 INSIST(0);
8459         }
8460
8461         if (acarray == NULL) {
8462                 unsigned int i;
8463
8464                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8465                                       sizeof(acachectl_t));
8466
8467                 if (acarray == NULL) {
8468                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8469                         goto fail;
8470                 }
8471
8472                 for (i = 0; i < total_count; i++) {
8473                         acarray[i].entry = NULL;
8474                         acarray[i].cbarg = NULL;
8475                 }
8476         }
8477         switch (type) {
8478         case dns_rdatasetadditional_fromauth:
8479                 header->additional_auth = acarray;
8480                 break;
8481         case dns_rdatasetadditional_fromglue:
8482                 header->additional_glue = acarray;
8483                 break;
8484         default:
8485                 INSIST(0);
8486         }
8487
8488         if (acarray[count].entry != NULL) {
8489                 /*
8490                  * Swap the entry.  Delay cleaning-up the old entry since
8491                  * it would require a node lock.
8492                  */
8493                 oldentry = acarray[count].entry;
8494                 INSIST(acarray[count].cbarg != NULL);
8495                 oldcbarg = acarray[count].cbarg;
8496         }
8497         acarray[count].entry = newentry;
8498         acarray[count].cbarg = newcbarg;
8499
8500         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8501
8502         if (oldentry != NULL) {
8503                 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8504                 dns_acache_detachentry(&oldentry);
8505         }
8506
8507         return (ISC_R_SUCCESS);
8508
8509  fail:
8510         if (newcbarg != NULL) {
8511                 if (newentry != NULL) {
8512                         acache_cancelentry(rbtdb->common.mctx, newentry,
8513                                            &newcbarg);
8514                         dns_acache_detachentry(&newentry);
8515                 } else {
8516                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8517                         dns_db_detach(&newcbarg->db);
8518                         isc_mem_put(rbtdb->common.mctx, newcbarg,
8519                             sizeof(*newcbarg));
8520                 }
8521         }
8522
8523         return (result);
8524 }
8525
8526 static isc_result_t
8527 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8528                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8529 {
8530         dns_rbtdb_t *rbtdb = rdataset->private1;
8531         dns_rbtnode_t *rbtnode = rdataset->private2;
8532         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8533         unsigned int current_count = rdataset->privateuint4;
8534         rdatasetheader_t *header;
8535         nodelock_t *nodelock;
8536         unsigned int total_count, count;
8537         acachectl_t *acarray;
8538         dns_acacheentry_t *entry;
8539         acache_cbarg_t *cbarg;
8540
8541         UNUSED(qtype);          /* we do not use this value at least for now */
8542         UNUSED(acache);
8543
8544         if (type == dns_rdatasetadditional_fromcache)
8545                 return (ISC_R_SUCCESS);
8546
8547         header = (struct rdatasetheader *)(raw - sizeof(*header));
8548
8549         total_count = raw[0] * 256 + raw[1];
8550         INSIST(total_count > current_count);
8551         count = total_count - current_count - 1;
8552
8553         acarray = NULL;
8554         entry = NULL;
8555
8556         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8557         NODE_LOCK(nodelock, isc_rwlocktype_write);
8558
8559         switch (type) {
8560         case dns_rdatasetadditional_fromauth:
8561                 acarray = header->additional_auth;
8562                 break;
8563         case dns_rdatasetadditional_fromglue:
8564                 acarray = header->additional_glue;
8565                 break;
8566         default:
8567                 INSIST(0);
8568         }
8569
8570         if (acarray == NULL) {
8571                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8572                 return (ISC_R_NOTFOUND);
8573         }
8574
8575         entry = acarray[count].entry;
8576         if (entry == NULL) {
8577                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8578                 return (ISC_R_NOTFOUND);
8579         }
8580
8581         acarray[count].entry = NULL;
8582         cbarg = acarray[count].cbarg;
8583         acarray[count].cbarg = NULL;
8584
8585         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8586
8587         if (entry != NULL) {
8588                 if (cbarg != NULL)
8589                         acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8590                 dns_acache_detachentry(&entry);
8591         }
8592
8593         return (ISC_R_SUCCESS);
8594 }
8595
8596 /*%
8597  * Routines for LRU-based cache management.
8598  */
8599
8600 /*%
8601  * See if a given cache entry that is being reused needs to be updated
8602  * in the LRU-list.  From the LRU management point of view, this function is
8603  * expected to return true for almost all cases.  When used with threads,
8604  * however, this may cause a non-negligible performance penalty because a
8605  * writer lock will have to be acquired before updating the list.
8606  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8607  * function returns true if the entry has not been updated for some period of
8608  * time.  We differentiate the NS or glue address case and the others since
8609  * experiments have shown that the former tends to be accessed relatively
8610  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8611  * may cause external queries at a higher level zone, involving more
8612  * transactions).
8613  *
8614  * Caller must hold the node (read or write) lock.
8615  */
8616 static inline isc_boolean_t
8617 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8618         if ((header->attributes &
8619              (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8620                 return (ISC_FALSE);
8621
8622 #if DNS_RBTDB_LIMITLRUUPDATE
8623         if (header->type == dns_rdatatype_ns ||
8624             (header->trust == dns_trust_glue &&
8625              (header->type == dns_rdatatype_a ||
8626               header->type == dns_rdatatype_aaaa))) {
8627                 /*
8628                  * Glue records are updated if at least 60 seconds have passed
8629                  * since the previous update time.
8630                  */
8631                 return (header->last_used + 60 <= now);
8632         }
8633
8634         /* Other records are updated if 5 minutes have passed. */
8635         return (header->last_used + 300 <= now);
8636 #else
8637         UNUSED(now);
8638
8639         return (ISC_TRUE);
8640 #endif
8641 }
8642
8643 /*%
8644  * Update the timestamp of a given cache entry and move it to the head
8645  * of the corresponding LRU list.
8646  *
8647  * Caller must hold the node (write) lock.
8648  *
8649  * Note that the we do NOT touch the heap here, as the TTL has not changed.
8650  */
8651 static void
8652 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8653               isc_stdtime_t now)
8654 {
8655         INSIST(IS_CACHE(rbtdb));
8656
8657         /* To be checked: can we really assume this? XXXMLG */
8658         INSIST(ISC_LINK_LINKED(header, link));
8659
8660         ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
8661         header->last_used = now;
8662         ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
8663 }
8664
8665 /*%
8666  * Purge some expired and/or stale (i.e. unused for some period) cache entries
8667  * under an overmem condition.  To recover from this condition quickly, up to
8668  * 2 entries will be purged.  This process is triggered while adding a new
8669  * entry, and we specifically avoid purging entries in the same LRU bucket as
8670  * the one to which the new entry will belong.  Otherwise, we might purge
8671  * entries of the same name of different RR types while adding RRsets from a
8672  * single response (consider the case where we're adding A and AAAA glue records
8673  * of the same NS name).
8674  */
8675 static void
8676 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8677               isc_stdtime_t now, isc_boolean_t tree_locked)
8678 {
8679         rdatasetheader_t *header, *header_prev;
8680         unsigned int locknum;
8681         int purgecount = 2;
8682
8683         for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8684              locknum != locknum_start && purgecount > 0;
8685              locknum = (locknum + 1) % rbtdb->node_lock_count) {
8686                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8687                           isc_rwlocktype_write);
8688
8689                 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8690                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8691                         expire_header(rbtdb, header, tree_locked);
8692                         purgecount--;
8693                 }
8694
8695                 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8696                      header != NULL && purgecount > 0;
8697                      header = header_prev) {
8698                         header_prev = ISC_LIST_PREV(header, link);
8699                         /*
8700                          * Unlink the entry at this point to avoid checking it
8701                          * again even if it's currently used someone else and
8702                          * cannot be purged at this moment.  This entry won't be
8703                          * referenced any more (so unlinking is safe) since the
8704                          * TTL was reset to 0.
8705                          */
8706                         ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8707                                         link);
8708                         expire_header(rbtdb, header, tree_locked);
8709                         purgecount--;
8710                 }
8711
8712                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8713                                     isc_rwlocktype_write);
8714         }
8715 }
8716
8717 static void
8718 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8719               isc_boolean_t tree_locked)
8720 {
8721         set_ttl(rbtdb, header, 0);
8722         header->attributes |= RDATASET_ATTR_STALE;
8723         header->node->dirty = 1;
8724
8725         /*
8726          * Caller must hold the node (write) lock.
8727          */
8728
8729         if (dns_rbtnode_refcurrent(header->node) == 0) {
8730                 /*
8731                  * If no one else is using the node, we can clean it up now.
8732                  * We first need to gain a new reference to the node to meet a
8733                  * requirement of decrement_reference().
8734                  */
8735                 new_reference(rbtdb, header->node);
8736                 decrement_reference(rbtdb, header->node, 0,
8737                                     isc_rwlocktype_write,
8738                                     tree_locked ? isc_rwlocktype_write :
8739                                     isc_rwlocktype_none, ISC_FALSE);
8740         }
8741 }