]> CyberLeo.Net >> Repos - FreeBSD/releng/9.1.git/blob - contrib/bind9/lib/dns/rbtdb.c
Copy stable/9 to releng/9.1 as part of the 9.1-RELEASE release process.
[FreeBSD/releng/9.1.git] / contrib / bind9 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004-2012  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id$ */
19
20 /*! \file */
21
22 /*
23  * Principal Author: Bob Halley
24  */
25
26 #include <config.h>
27
28 /* #define inline */
29
30 #include <isc/event.h>
31 #include <isc/heap.h>
32 #include <isc/mem.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
41 #include <isc/task.h>
42 #include <isc/time.h>
43 #include <isc/util.h>
44
45 #include <dns/acache.h>
46 #include <dns/db.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
50 #include <dns/lib.h>
51 #include <dns/log.h>
52 #include <dns/masterdump.h>
53 #include <dns/nsec.h>
54 #include <dns/nsec3.h>
55 #include <dns/rbt.h>
56 #include <dns/rpz.h>
57 #include <dns/rdata.h>
58 #include <dns/rdataset.h>
59 #include <dns/rdatasetiter.h>
60 #include <dns/rdataslab.h>
61 #include <dns/rdatastruct.h>
62 #include <dns/result.h>
63 #include <dns/stats.h>
64 #include <dns/view.h>
65 #include <dns/zone.h>
66 #include <dns/zonekey.h>
67
68 #ifdef DNS_RBTDB_VERSION64
69 #include "rbtdb64.h"
70 #else
71 #include "rbtdb.h"
72 #endif
73
74 #ifdef DNS_RBTDB_VERSION64
75 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
76 #else
77 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
78 #endif
79
80 /*%
81  * Note that "impmagic" is not the first four bytes of the struct, so
82  * ISC_MAGIC_VALID cannot be used.
83  */
84 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
85                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
86
87 #ifdef DNS_RBTDB_VERSION64
88 typedef isc_uint64_t                    rbtdb_serial_t;
89 /*%
90  * Make casting easier in symbolic debuggers by using different names
91  * for the 64 bit version.
92  */
93 #define dns_rbtdb_t dns_rbtdb64_t
94 #define rdatasetheader_t rdatasetheader64_t
95 #define rbtdb_version_t rbtdb_version64_t
96 #else
97 typedef isc_uint32_t                    rbtdb_serial_t;
98 #endif
99
100 typedef isc_uint32_t                    rbtdb_rdatatype_t;
101
102 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
103 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
104 #define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
105
106 #define RBTDB_RDATATYPE_SIGNSEC \
107                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
108 #define RBTDB_RDATATYPE_SIGNSEC3 \
109                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
110 #define RBTDB_RDATATYPE_SIGNS \
111                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
112 #define RBTDB_RDATATYPE_SIGCNAME \
113                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
114 #define RBTDB_RDATATYPE_SIGDNAME \
115                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
116 #define RBTDB_RDATATYPE_NCACHEANY \
117                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
118
119 /*
120  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
121  * Using rwlock is effective with regard to lookup performance only when
122  * it is implemented in an efficient way.
123  * Otherwise, it is generally wise to stick to the simple locking since rwlock
124  * would require more memory or can even make lookups slower due to its own
125  * overhead (when it internally calls mutex locks).
126  */
127 #ifdef ISC_RWLOCK_USEATOMIC
128 #define DNS_RBTDB_USERWLOCK 1
129 #else
130 #define DNS_RBTDB_USERWLOCK 0
131 #endif
132
133 #if DNS_RBTDB_USERWLOCK
134 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
135 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
136 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
137 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
138 #else
139 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
140 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
141 #define RBTDB_LOCK(l, t)        LOCK(l)
142 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
143 #endif
144
145 /*
146  * Since node locking is sensitive to both performance and memory footprint,
147  * we need some trick here.  If we have both high-performance rwlock and
148  * high performance and small-memory reference counters, we use rwlock for
149  * node lock and isc_refcount for node references.  In this case, we don't have
150  * to protect the access to the counters by locks.
151  * Otherwise, we simply use ordinary mutex lock for node locking, and use
152  * simple integers as reference counters which is protected by the lock.
153  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
154  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
155  * counters first and then protect other parts of a node as read-only data.
156  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
157  * provided for these special cases.  When we can use the efficient backend
158  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
159  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
160  * section including the access to the reference counter.
161  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
162  * section is also protected by NODE_STRONGLOCK().
163  */
164 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
165 typedef isc_rwlock_t nodelock_t;
166
167 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
168 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
169 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
170 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
171 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
172
173 #define NODE_STRONGLOCK(l)      ((void)0)
174 #define NODE_STRONGUNLOCK(l)    ((void)0)
175 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
176 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
177 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
178 #else
179 typedef isc_mutex_t nodelock_t;
180
181 #define NODE_INITLOCK(l)        isc_mutex_init(l)
182 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
183 #define NODE_LOCK(l, t)         LOCK(l)
184 #define NODE_UNLOCK(l, t)       UNLOCK(l)
185 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
186
187 #define NODE_STRONGLOCK(l)      LOCK(l)
188 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
189 #define NODE_WEAKLOCK(l, t)     ((void)0)
190 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
191 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
192 #endif
193
194 /*%
195  * Whether to rate-limit updating the LRU to avoid possible thread contention.
196  * Our performance measurement has shown the cost is marginal, so it's defined
197  * to be 0 by default either with or without threads.
198  */
199 #ifndef DNS_RBTDB_LIMITLRUUPDATE
200 #define DNS_RBTDB_LIMITLRUUPDATE 0
201 #endif
202
203 /*
204  * Allow clients with a virtual time of up to 5 minutes in the past to see
205  * records that would have otherwise have expired.
206  */
207 #define RBTDB_VIRTUAL 300
208
209 struct noqname {
210         dns_name_t      name;
211         void *          neg;
212         void *          negsig;
213         dns_rdatatype_t type;
214 };
215
216 typedef struct acachectl acachectl_t;
217
218 typedef struct rdatasetheader {
219         /*%
220          * Locked by the owning node's lock.
221          */
222         rbtdb_serial_t                  serial;
223         dns_ttl_t                       rdh_ttl;
224         rbtdb_rdatatype_t               type;
225         isc_uint16_t                    attributes;
226         dns_trust_t                     trust;
227         struct noqname                  *noqname;
228         struct noqname                  *closest;
229         /*%<
230          * We don't use the LIST macros, because the LIST structure has
231          * both head and tail pointers, and is doubly linked.
232          */
233
234         struct rdatasetheader           *next;
235         /*%<
236          * If this is the top header for an rdataset, 'next' points
237          * to the top header for the next rdataset (i.e., the next type).
238          * Otherwise, it points up to the header whose down pointer points
239          * at this header.
240          */
241
242         struct rdatasetheader           *down;
243         /*%<
244          * Points to the header for the next older version of
245          * this rdataset.
246          */
247
248         isc_uint32_t                    count;
249         /*%<
250          * Monotonously increased every time this rdataset is bound so that
251          * it is used as the base of the starting point in DNS responses
252          * when the "cyclic" rrset-order is required.  Since the ordering
253          * should not be so crucial, no lock is set for the counter for
254          * performance reasons.
255          */
256
257         acachectl_t                     *additional_auth;
258         acachectl_t                     *additional_glue;
259
260         dns_rbtnode_t                   *node;
261         isc_stdtime_t                   last_used;
262         ISC_LINK(struct rdatasetheader) link;
263
264         unsigned int                    heap_index;
265         /*%<
266          * Used for TTL-based cache cleaning.
267          */
268         isc_stdtime_t                   resign;
269 } rdatasetheader_t;
270
271 typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
272 typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;
273
274 #define RDATASET_ATTR_NONEXISTENT       0x0001
275 #define RDATASET_ATTR_STALE             0x0002
276 #define RDATASET_ATTR_IGNORE            0x0004
277 #define RDATASET_ATTR_RETAIN            0x0008
278 #define RDATASET_ATTR_NXDOMAIN          0x0010
279 #define RDATASET_ATTR_RESIGN            0x0020
280 #define RDATASET_ATTR_STATCOUNT         0x0040
281 #define RDATASET_ATTR_OPTOUT            0x0080
282 #define RDATASET_ATTR_NEGATIVE          0x0100
283
284 typedef struct acache_cbarg {
285         dns_rdatasetadditional_t        type;
286         unsigned int                    count;
287         dns_db_t                        *db;
288         dns_dbnode_t                    *node;
289         rdatasetheader_t                *header;
290 } acache_cbarg_t;
291
292 struct acachectl {
293         dns_acacheentry_t               *entry;
294         acache_cbarg_t                  *cbarg;
295 };
296
297 /*
298  * XXX
299  * When the cache will pre-expire data (due to memory low or other
300  * situations) before the rdataset's TTL has expired, it MUST
301  * respect the RETAIN bit and not expire the data until its TTL is
302  * expired.
303  */
304
305 #undef IGNORE                   /* WIN32 winbase.h defines this. */
306
307 #define EXISTS(header) \
308         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
309 #define NONEXISTENT(header) \
310         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
311 #define IGNORE(header) \
312         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
313 #define RETAIN(header) \
314         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
315 #define NXDOMAIN(header) \
316         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
317 #define RESIGN(header) \
318         (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
319 #define OPTOUT(header) \
320         (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
321 #define NEGATIVE(header) \
322         (((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
323
324 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
325
326 /*%
327  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
328  * There is a tradeoff issue about configuring this value: if this is too
329  * small, it may cause heavier contention between threads; if this is too large,
330  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
331  * The default value should work well for most environments, but this can
332  * also be configurable at compilation time via the
333  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
334  * 1 due to the assumption of overmem_purge().
335  */
336 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
337 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
338 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
339 #else
340 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
341 #endif
342 #else
343 #define DEFAULT_CACHE_NODE_LOCK_COUNT   16
344 #endif  /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
345
346 typedef struct {
347         nodelock_t                      lock;
348         /* Protected in the refcount routines. */
349         isc_refcount_t                  references;
350         /* Locked by lock. */
351         isc_boolean_t                   exiting;
352 } rbtdb_nodelock_t;
353
354 typedef struct rbtdb_changed {
355         dns_rbtnode_t *                 node;
356         isc_boolean_t                   dirty;
357         ISC_LINK(struct rbtdb_changed)  link;
358 } rbtdb_changed_t;
359
360 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
361
362 typedef enum {
363         dns_db_insecure,
364         dns_db_partial,
365         dns_db_secure
366 } dns_db_secure_t;
367
368 typedef struct dns_rbtdb dns_rbtdb_t;
369
370 typedef struct rbtdb_version {
371         /* Not locked */
372         rbtdb_serial_t                  serial;
373         dns_rbtdb_t *                   rbtdb;
374         /*
375          * Protected in the refcount routines.
376          * XXXJT: should we change the lock policy based on the refcount
377          * performance?
378          */
379         isc_refcount_t                  references;
380         /* Locked by database lock. */
381         isc_boolean_t                   writer;
382         isc_boolean_t                   commit_ok;
383         rbtdb_changedlist_t             changed_list;
384         rdatasetheaderlist_t            resigned_list;
385         ISC_LINK(struct rbtdb_version)  link;
386         dns_db_secure_t                 secure;
387         isc_boolean_t                   havensec3;
388         /* NSEC3 parameters */
389         dns_hash_t                      hash;
390         isc_uint8_t                     flags;
391         isc_uint16_t                    iterations;
392         isc_uint8_t                     salt_length;
393         unsigned char                   salt[DNS_NSEC3_SALTSIZE];
394 } rbtdb_version_t;
395
396 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
397
398 struct dns_rbtdb {
399         /* Unlocked. */
400         dns_db_t                        common;
401         /* Locks the data in this struct */
402 #if DNS_RBTDB_USERWLOCK
403         isc_rwlock_t                    lock;
404 #else
405         isc_mutex_t                     lock;
406 #endif
407         /* Locks the tree structure (prevents nodes appearing/disappearing) */
408         isc_rwlock_t                    tree_lock;
409         /* Locks for individual tree nodes */
410         unsigned int                    node_lock_count;
411         rbtdb_nodelock_t *              node_locks;
412         dns_rbtnode_t *                 origin_node;
413         dns_stats_t *                   rrsetstats; /* cache DB only */
414         /* Locked by lock. */
415         unsigned int                    active;
416         isc_refcount_t                  references;
417         unsigned int                    attributes;
418         rbtdb_serial_t                  current_serial;
419         rbtdb_serial_t                  least_serial;
420         rbtdb_serial_t                  next_serial;
421         rbtdb_version_t *               current_version;
422         rbtdb_version_t *               future_version;
423         rbtdb_versionlist_t             open_versions;
424         isc_task_t *                    task;
425         dns_dbnode_t                    *soanode;
426         dns_dbnode_t                    *nsnode;
427
428         /*
429          * This is a linked list used to implement the LRU cache.  There will
430          * be node_lock_count linked lists here.  Nodes in bucket 1 will be
431          * placed on the linked list rdatasets[1].
432          */
433         rdatasetheaderlist_t            *rdatasets;
434
435         /*%
436          * Temporary storage for stale cache nodes and dynamically deleted
437          * nodes that await being cleaned up.
438          */
439         rbtnodelist_t                   *deadnodes;
440
441         /*
442          * Heaps.  These are used for TTL based expiry in a cache,
443          * or for zone resigning in a zone DB.  hmctx is the memory
444          * context to use for the heap (which differs from the main
445          * database memory context in the case of a cache).
446          */
447         isc_mem_t *                     hmctx;
448         isc_heap_t                      **heaps;
449
450         /* Locked by tree_lock. */
451         dns_rbt_t *                     tree;
452         dns_rbt_t *                     nsec;
453         dns_rbt_t *                     nsec3;
454         dns_rpz_cidr_t *                rpz_cidr;
455
456         /* Unlocked */
457         unsigned int                    quantum;
458 };
459
460 #define RBTDB_ATTR_LOADED               0x01
461 #define RBTDB_ATTR_LOADING              0x02
462
463 /*%
464  * Search Context
465  */
466 typedef struct {
467         dns_rbtdb_t *           rbtdb;
468         rbtdb_version_t *       rbtversion;
469         rbtdb_serial_t          serial;
470         unsigned int            options;
471         dns_rbtnodechain_t      chain;
472         isc_boolean_t           copy_name;
473         isc_boolean_t           need_cleanup;
474         isc_boolean_t           wild;
475         dns_rbtnode_t *         zonecut;
476         rdatasetheader_t *      zonecut_rdataset;
477         rdatasetheader_t *      zonecut_sigrdataset;
478         dns_fixedname_t         zonecut_name;
479         isc_stdtime_t           now;
480 } rbtdb_search_t;
481
482 /*%
483  * Load Context
484  */
485 typedef struct {
486         dns_rbtdb_t *           rbtdb;
487         isc_stdtime_t           now;
488 } rbtdb_load_t;
489
490 static void rdataset_disassociate(dns_rdataset_t *rdataset);
491 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
492 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
493 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
494 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
495 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
496 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
497                                         dns_name_t *name,
498                                         dns_rdataset_t *neg,
499                                         dns_rdataset_t *negsig);
500 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
501                                         dns_name_t *name,
502                                         dns_rdataset_t *neg,
503                                         dns_rdataset_t *negsig);
504 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
505                                            dns_rdatasetadditional_t type,
506                                            dns_rdatatype_t qtype,
507                                            dns_acache_t *acache,
508                                            dns_zone_t **zonep,
509                                            dns_db_t **dbp,
510                                            dns_dbversion_t **versionp,
511                                            dns_dbnode_t **nodep,
512                                            dns_name_t *fname,
513                                            dns_message_t *msg,
514                                            isc_stdtime_t now);
515 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
516                                            dns_rdatasetadditional_t type,
517                                            dns_rdatatype_t qtype,
518                                            dns_acache_t *acache,
519                                            dns_zone_t *zone,
520                                            dns_db_t *db,
521                                            dns_dbversion_t *version,
522                                            dns_dbnode_t *node,
523                                            dns_name_t *fname);
524 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
525                                            dns_rdataset_t *rdataset,
526                                            dns_rdatasetadditional_t type,
527                                            dns_rdatatype_t qtype);
528 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
529                                               isc_stdtime_t now);
530 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
531                           isc_stdtime_t now);
532 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
533                           isc_boolean_t tree_locked);
534 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
535                           isc_stdtime_t now, isc_boolean_t tree_locked);
536 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
537                                   rdatasetheader_t *newheader);
538 static void prune_tree(isc_task_t *task, isc_event_t *event);
539 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
540 static void rdataset_expire(dns_rdataset_t *rdataset);
541
542 static dns_rdatasetmethods_t rdataset_methods = {
543         rdataset_disassociate,
544         rdataset_first,
545         rdataset_next,
546         rdataset_current,
547         rdataset_clone,
548         rdataset_count,
549         NULL,
550         rdataset_getnoqname,
551         NULL,
552         rdataset_getclosest,
553         rdataset_getadditional,
554         rdataset_setadditional,
555         rdataset_putadditional,
556         rdataset_settrust,
557         rdataset_expire
558 };
559
560 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
561 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
562 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
563 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
564                                  dns_rdataset_t *rdataset);
565
566 static dns_rdatasetitermethods_t rdatasetiter_methods = {
567         rdatasetiter_destroy,
568         rdatasetiter_first,
569         rdatasetiter_next,
570         rdatasetiter_current
571 };
572
573 typedef struct rbtdb_rdatasetiter {
574         dns_rdatasetiter_t              common;
575         rdatasetheader_t *              current;
576 } rbtdb_rdatasetiter_t;
577
578 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
579 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
580 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
581 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
582                                         dns_name_t *name);
583 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
584 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
585 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
586                                            dns_dbnode_t **nodep,
587                                            dns_name_t *name);
588 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
589 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
590                                           dns_name_t *name);
591
592 static dns_dbiteratormethods_t dbiterator_methods = {
593         dbiterator_destroy,
594         dbiterator_first,
595         dbiterator_last,
596         dbiterator_seek,
597         dbiterator_prev,
598         dbiterator_next,
599         dbiterator_current,
600         dbiterator_pause,
601         dbiterator_origin
602 };
603
604 #define DELETION_BATCH_MAX 64
605
606 /*
607  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
608  */
609 typedef struct rbtdb_dbiterator {
610         dns_dbiterator_t                common;
611         isc_boolean_t                   paused;
612         isc_boolean_t                   new_origin;
613         isc_rwlocktype_t                tree_locked;
614         isc_result_t                    result;
615         dns_fixedname_t                 name;
616         dns_fixedname_t                 origin;
617         dns_rbtnodechain_t              chain;
618         dns_rbtnodechain_t              nsec3chain;
619         dns_rbtnodechain_t              *current;
620         dns_rbtnode_t                   *node;
621         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
622         int                             delete;
623         isc_boolean_t                   nsec3only;
624         isc_boolean_t                   nonsec3;
625 } rbtdb_dbiterator_t;
626
627
628 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
629 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
630
631 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
632                        isc_event_t *event);
633 static void overmem(dns_db_t *db, isc_boolean_t overmem);
634 #ifdef BIND9
635 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version);
636 #endif
637
638 /*%
639  * 'init_count' is used to initialize 'newheader->count' which inturn
640  * is used to determine where in the cycle rrset-order cyclic starts.
641  * We don't lock this as we don't care about simultaneous updates.
642  *
643  * Note:
644  *      Both init_count and header->count can be ISC_UINT32_MAX.
645  *      The count on the returned rdataset however can't be as
646  *      that indicates that the database does not implement cyclic
647  *      processing.
648  */
649 static unsigned int init_count;
650
651 /*
652  * Locking
653  *
654  * If a routine is going to lock more than one lock in this module, then
655  * the locking must be done in the following order:
656  *
657  *      Tree Lock
658  *
659  *      Node Lock       (Only one from the set may be locked at one time by
660  *                       any caller)
661  *
662  *      Database Lock
663  *
664  * Failure to follow this hierarchy can result in deadlock.
665  */
666
667 /*
668  * Deleting Nodes
669  *
670  * For zone databases the node for the origin of the zone MUST NOT be deleted.
671  */
672
673
674 /*
675  * DB Routines
676  */
677
678 static void
679 attach(dns_db_t *source, dns_db_t **targetp) {
680         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
681
682         REQUIRE(VALID_RBTDB(rbtdb));
683
684         isc_refcount_increment(&rbtdb->references, NULL);
685
686         *targetp = source;
687 }
688
689 static void
690 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
691         dns_rbtdb_t *rbtdb = event->ev_arg;
692
693         UNUSED(task);
694
695         free_rbtdb(rbtdb, ISC_TRUE, event);
696 }
697
698 static void
699 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
700                   isc_boolean_t increment)
701 {
702         dns_rdatastatstype_t statattributes = 0;
703         dns_rdatastatstype_t base = 0;
704         dns_rdatastatstype_t type;
705
706         /* At the moment we count statistics only for cache DB */
707         INSIST(IS_CACHE(rbtdb));
708
709         if (NEGATIVE(header)) {
710                 if (NXDOMAIN(header))
711                         statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
712                 else {
713                         statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
714                         base = RBTDB_RDATATYPE_EXT(header->type);
715                 }
716         } else
717                 base = RBTDB_RDATATYPE_BASE(header->type);
718
719         type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
720         if (increment)
721                 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
722         else
723                 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
724 }
725
726 static void
727 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
728         int idx;
729         isc_heap_t *heap;
730         dns_ttl_t oldttl;
731
732         oldttl = header->rdh_ttl;
733         header->rdh_ttl = newttl;
734
735         if (!IS_CACHE(rbtdb))
736                 return;
737
738         /*
739          * It's possible the rbtdb is not a cache.  If this is the case,
740          * we will not have a heap, and we move on.  If we do, though,
741          * we might need to adjust things.
742          */
743         if (header->heap_index == 0 || newttl == oldttl)
744                 return;
745         idx = header->node->locknum;
746         if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
747             return;
748         heap = rbtdb->heaps[idx];
749
750         if (newttl < oldttl)
751                 isc_heap_increased(heap, header->heap_index);
752         else
753                 isc_heap_decreased(heap, header->heap_index);
754 }
755
756 /*%
757  * These functions allow the heap code to rank the priority of each
758  * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
759  */
760 static isc_boolean_t
761 ttl_sooner(void *v1, void *v2) {
762         rdatasetheader_t *h1 = v1;
763         rdatasetheader_t *h2 = v2;
764
765         if (h1->rdh_ttl < h2->rdh_ttl)
766                 return (ISC_TRUE);
767         return (ISC_FALSE);
768 }
769
770 static isc_boolean_t
771 resign_sooner(void *v1, void *v2) {
772         rdatasetheader_t *h1 = v1;
773         rdatasetheader_t *h2 = v2;
774
775         if (h1->resign < h2->resign)
776                 return (ISC_TRUE);
777         return (ISC_FALSE);
778 }
779
780 /*%
781  * This function sets the heap index into the header.
782  */
783 static void
784 set_index(void *what, unsigned int index) {
785         rdatasetheader_t *h = what;
786
787         h->heap_index = index;
788 }
789
790 /*%
791  * Work out how many nodes can be deleted in the time between two
792  * requests to the nameserver.  Smooth the resulting number and use it
793  * as a estimate for the number of nodes to be deleted in the next
794  * iteration.
795  */
796 static unsigned int
797 adjust_quantum(unsigned int old, isc_time_t *start) {
798         unsigned int pps = dns_pps;     /* packets per second */
799         unsigned int interval;
800         isc_uint64_t usecs;
801         isc_time_t end;
802         unsigned int new;
803
804         if (pps < 100)
805                 pps = 100;
806         isc_time_now(&end);
807
808         interval = 1000000 / pps;       /* interval in usec */
809         if (interval == 0)
810                 interval = 1;
811         usecs = isc_time_microdiff(&end, start);
812         if (usecs == 0) {
813                 /*
814                  * We were unable to measure the amount of time taken.
815                  * Double the nodes deleted next time.
816                  */
817                 old *= 2;
818                 if (old > 1000)
819                         old = 1000;
820                 return (old);
821         }
822         new = old * interval;
823         new /= (unsigned int)usecs;
824         if (new == 0)
825                 new = 1;
826         else if (new > 1000)
827                 new = 1000;
828
829         /* Smooth */
830         new = (new + old * 3) / 4;
831
832         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
833                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
834
835         return (new);
836 }
837
838 static void
839 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
840         unsigned int i;
841         isc_ondestroy_t ondest;
842         isc_result_t result;
843         char buf[DNS_NAME_FORMATSIZE];
844         dns_rbt_t **treep;
845         isc_time_t start;
846
847         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
848                 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
849
850         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
851         REQUIRE(rbtdb->future_version == NULL);
852
853         if (rbtdb->current_version != NULL) {
854                 unsigned int refs;
855
856                 isc_refcount_decrement(&rbtdb->current_version->references,
857                                        &refs);
858                 INSIST(refs == 0);
859                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
860                 isc_refcount_destroy(&rbtdb->current_version->references);
861                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
862                             sizeof(rbtdb_version_t));
863         }
864
865         /*
866          * We assume the number of remaining dead nodes is reasonably small;
867          * the overhead of unlinking all nodes here should be negligible.
868          */
869         for (i = 0; i < rbtdb->node_lock_count; i++) {
870                 dns_rbtnode_t *node;
871
872                 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
873                 while (node != NULL) {
874                         ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
875                         node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
876                 }
877         }
878
879         if (event == NULL)
880                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
881
882         for (;;) {
883                 /*
884                  * pick the next tree to (start to) destroy
885                  */
886                 treep = &rbtdb->tree;
887                 if (*treep == NULL) {
888                         treep = &rbtdb->nsec;
889                         if (*treep == NULL) {
890                                 treep = &rbtdb->nsec3;
891                                 /*
892                                  * we're finished after clear cutting
893                                  */
894                                 if (*treep == NULL)
895                                         break;
896                         }
897                 }
898
899                 isc_time_now(&start);
900                 result = dns_rbt_destroy2(treep, rbtdb->quantum);
901                 if (result == ISC_R_QUOTA) {
902                         INSIST(rbtdb->task != NULL);
903                         if (rbtdb->quantum != 0)
904                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
905                                                                 &start);
906                         if (event == NULL)
907                                 event = isc_event_allocate(rbtdb->common.mctx,
908                                                            NULL,
909                                                          DNS_EVENT_FREESTORAGE,
910                                                            free_rbtdb_callback,
911                                                            rbtdb,
912                                                            sizeof(isc_event_t));
913                         if (event == NULL)
914                                 continue;
915                         isc_task_send(rbtdb->task, &event);
916                         return;
917                 }
918                 INSIST(result == ISC_R_SUCCESS && *treep == NULL);
919         }
920
921         if (event != NULL)
922                 isc_event_free(&event);
923         if (log) {
924                 if (dns_name_dynamic(&rbtdb->common.origin))
925                         dns_name_format(&rbtdb->common.origin, buf,
926                                         sizeof(buf));
927                 else
928                         strcpy(buf, "<UNKNOWN>");
929                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
930                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
931                               "done free_rbtdb(%s)", buf);
932         }
933         if (dns_name_dynamic(&rbtdb->common.origin))
934                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
935         for (i = 0; i < rbtdb->node_lock_count; i++) {
936                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
937                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
938         }
939
940         /*
941          * Clean up LRU / re-signing order lists.
942          */
943         if (rbtdb->rdatasets != NULL) {
944                 for (i = 0; i < rbtdb->node_lock_count; i++)
945                         INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
946                 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
947                             rbtdb->node_lock_count *
948                             sizeof(rdatasetheaderlist_t));
949         }
950         /*
951          * Clean up dead node buckets.
952          */
953         if (rbtdb->deadnodes != NULL) {
954                 for (i = 0; i < rbtdb->node_lock_count; i++)
955                         INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
956                 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
957                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
958         }
959         /*
960          * Clean up heap objects.
961          */
962         if (rbtdb->heaps != NULL) {
963                 for (i = 0; i < rbtdb->node_lock_count; i++)
964                         isc_heap_destroy(&rbtdb->heaps[i]);
965                 isc_mem_put(rbtdb->hmctx, rbtdb->heaps,
966                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
967         }
968
969         if (rbtdb->rrsetstats != NULL)
970                 dns_stats_detach(&rbtdb->rrsetstats);
971
972 #ifdef BIND9
973         if (rbtdb->rpz_cidr != NULL)
974                 dns_rpz_cidr_free(&rbtdb->rpz_cidr);
975 #endif
976
977         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
978                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
979         isc_rwlock_destroy(&rbtdb->tree_lock);
980         isc_refcount_destroy(&rbtdb->references);
981         if (rbtdb->task != NULL)
982                 isc_task_detach(&rbtdb->task);
983
984         RBTDB_DESTROYLOCK(&rbtdb->lock);
985         rbtdb->common.magic = 0;
986         rbtdb->common.impmagic = 0;
987         ondest = rbtdb->common.ondest;
988         isc_mem_detach(&rbtdb->hmctx);
989         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
990         isc_ondestroy_notify(&ondest, rbtdb);
991 }
992
993 static inline void
994 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
995         isc_boolean_t want_free = ISC_FALSE;
996         unsigned int i;
997         unsigned int inactive = 0;
998
999         /* XXX check for open versions here */
1000
1001         if (rbtdb->soanode != NULL)
1002                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
1003         if (rbtdb->nsnode != NULL)
1004                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
1005
1006         /*
1007          * Even though there are no external direct references, there still
1008          * may be nodes in use.
1009          */
1010         for (i = 0; i < rbtdb->node_lock_count; i++) {
1011                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1012                 rbtdb->node_locks[i].exiting = ISC_TRUE;
1013                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1014                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1015                     == 0) {
1016                         inactive++;
1017                 }
1018         }
1019
1020         if (inactive != 0) {
1021                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1022                 rbtdb->active -= inactive;
1023                 if (rbtdb->active == 0)
1024                         want_free = ISC_TRUE;
1025                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1026                 if (want_free) {
1027                         char buf[DNS_NAME_FORMATSIZE];
1028                         if (dns_name_dynamic(&rbtdb->common.origin))
1029                                 dns_name_format(&rbtdb->common.origin, buf,
1030                                                 sizeof(buf));
1031                         else
1032                                 strcpy(buf, "<UNKNOWN>");
1033                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1034                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1035                                       "calling free_rbtdb(%s)", buf);
1036                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
1037                 }
1038         }
1039 }
1040
1041 static void
1042 detach(dns_db_t **dbp) {
1043         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1044         unsigned int refs;
1045
1046         REQUIRE(VALID_RBTDB(rbtdb));
1047
1048         isc_refcount_decrement(&rbtdb->references, &refs);
1049
1050         if (refs == 0)
1051                 maybe_free_rbtdb(rbtdb);
1052
1053         *dbp = NULL;
1054 }
1055
1056 static void
1057 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1058         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1059         rbtdb_version_t *version;
1060         unsigned int refs;
1061
1062         REQUIRE(VALID_RBTDB(rbtdb));
1063
1064         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1065         version = rbtdb->current_version;
1066         isc_refcount_increment(&version->references, &refs);
1067         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1068
1069         *versionp = (dns_dbversion_t *)version;
1070 }
1071
1072 static inline rbtdb_version_t *
1073 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1074                  unsigned int references, isc_boolean_t writer)
1075 {
1076         isc_result_t result;
1077         rbtdb_version_t *version;
1078
1079         version = isc_mem_get(mctx, sizeof(*version));
1080         if (version == NULL)
1081                 return (NULL);
1082         version->serial = serial;
1083         result = isc_refcount_init(&version->references, references);
1084         if (result != ISC_R_SUCCESS) {
1085                 isc_mem_put(mctx, version, sizeof(*version));
1086                 return (NULL);
1087         }
1088         version->writer = writer;
1089         version->commit_ok = ISC_FALSE;
1090         ISC_LIST_INIT(version->changed_list);
1091         ISC_LIST_INIT(version->resigned_list);
1092         ISC_LINK_INIT(version, link);
1093
1094         return (version);
1095 }
1096
1097 static isc_result_t
1098 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1099         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1100         rbtdb_version_t *version;
1101
1102         REQUIRE(VALID_RBTDB(rbtdb));
1103         REQUIRE(versionp != NULL && *versionp == NULL);
1104         REQUIRE(rbtdb->future_version == NULL);
1105
1106         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1107         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
1108         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1109                                    ISC_TRUE);
1110         if (version != NULL) {
1111                 version->rbtdb = rbtdb;
1112                 version->commit_ok = ISC_TRUE;
1113                 version->secure = rbtdb->current_version->secure;
1114                 version->havensec3 = rbtdb->current_version->havensec3;
1115                 if (version->havensec3) {
1116                         version->flags = rbtdb->current_version->flags;
1117                         version->iterations =
1118                                 rbtdb->current_version->iterations;
1119                         version->hash = rbtdb->current_version->hash;
1120                         version->salt_length =
1121                                 rbtdb->current_version->salt_length;
1122                         memcpy(version->salt, rbtdb->current_version->salt,
1123                                version->salt_length);
1124                 } else {
1125                         version->flags = 0;
1126                         version->iterations = 0;
1127                         version->hash = 0;
1128                         version->salt_length = 0;
1129                         memset(version->salt, 0, sizeof(version->salt));
1130                 }
1131                 rbtdb->next_serial++;
1132                 rbtdb->future_version = version;
1133         }
1134         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1135
1136         if (version == NULL)
1137                 return (ISC_R_NOMEMORY);
1138
1139         *versionp = version;
1140
1141         return (ISC_R_SUCCESS);
1142 }
1143
1144 static void
1145 attachversion(dns_db_t *db, dns_dbversion_t *source,
1146               dns_dbversion_t **targetp)
1147 {
1148         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1149         rbtdb_version_t *rbtversion = source;
1150         unsigned int refs;
1151
1152         REQUIRE(VALID_RBTDB(rbtdb));
1153         INSIST(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
1154
1155         isc_refcount_increment(&rbtversion->references, &refs);
1156         INSIST(refs > 1);
1157
1158         *targetp = rbtversion;
1159 }
1160
1161 static rbtdb_changed_t *
1162 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1163             dns_rbtnode_t *node)
1164 {
1165         rbtdb_changed_t *changed;
1166         unsigned int refs;
1167
1168         /*
1169          * Caller must be holding the node lock if its reference must be
1170          * protected by the lock.
1171          */
1172
1173         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1174
1175         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1176
1177         REQUIRE(version->writer);
1178
1179         if (changed != NULL) {
1180                 dns_rbtnode_refincrement(node, &refs);
1181                 INSIST(refs != 0);
1182                 changed->node = node;
1183                 changed->dirty = ISC_FALSE;
1184                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1185         } else
1186                 version->commit_ok = ISC_FALSE;
1187
1188         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1189
1190         return (changed);
1191 }
1192
1193 static void
1194 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1195                  acachectl_t *array)
1196 {
1197         unsigned int count;
1198         unsigned int i;
1199         unsigned char *raw;     /* RDATASLAB */
1200
1201         /*
1202          * The caller must be holding the corresponding node lock.
1203          */
1204
1205         if (array == NULL)
1206                 return;
1207
1208         raw = (unsigned char *)header + sizeof(*header);
1209         count = raw[0] * 256 + raw[1];
1210
1211         /*
1212          * Sanity check: since an additional cache entry has a reference to
1213          * the original DB node (in the callback arg), there should be no
1214          * acache entries when the node can be freed.
1215          */
1216         for (i = 0; i < count; i++)
1217                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1218
1219         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1220 }
1221
1222 static inline void
1223 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1224
1225         if (dns_name_dynamic(&(*noqname)->name))
1226                 dns_name_free(&(*noqname)->name, mctx);
1227         if ((*noqname)->neg != NULL)
1228                 isc_mem_put(mctx, (*noqname)->neg,
1229                             dns_rdataslab_size((*noqname)->neg, 0));
1230         if ((*noqname)->negsig != NULL)
1231                 isc_mem_put(mctx, (*noqname)->negsig,
1232                             dns_rdataslab_size((*noqname)->negsig, 0));
1233         isc_mem_put(mctx, *noqname, sizeof(**noqname));
1234         *noqname = NULL;
1235 }
1236
1237 static inline void
1238 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1239 {
1240         ISC_LINK_INIT(h, link);
1241         h->heap_index = 0;
1242
1243 #if TRACE_HEADER
1244         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1245                 fprintf(stderr, "initialized header: %p\n", h);
1246 #else
1247         UNUSED(rbtdb);
1248 #endif
1249 }
1250
1251 static inline rdatasetheader_t *
1252 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1253 {
1254         rdatasetheader_t *h;
1255
1256         h = isc_mem_get(mctx, sizeof(*h));
1257         if (h == NULL)
1258                 return (NULL);
1259
1260 #if TRACE_HEADER
1261         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1262                 fprintf(stderr, "allocated header: %p\n", h);
1263 #endif
1264         init_rdataset(rbtdb, h);
1265         return (h);
1266 }
1267
1268 static inline void
1269 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1270 {
1271         unsigned int size;
1272         int idx;
1273
1274         if (EXISTS(rdataset) &&
1275             (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1276                 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1277         }
1278
1279         idx = rdataset->node->locknum;
1280         if (ISC_LINK_LINKED(rdataset, link)) {
1281                 INSIST(IS_CACHE(rbtdb));
1282                 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1283         }
1284         if (rdataset->heap_index != 0)
1285                 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1286         rdataset->heap_index = 0;
1287
1288         if (rdataset->noqname != NULL)
1289                 free_noqname(mctx, &rdataset->noqname);
1290         if (rdataset->closest != NULL)
1291                 free_noqname(mctx, &rdataset->closest);
1292
1293         free_acachearray(mctx, rdataset, rdataset->additional_auth);
1294         free_acachearray(mctx, rdataset, rdataset->additional_glue);
1295
1296         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1297                 size = sizeof(*rdataset);
1298         else
1299                 size = dns_rdataslab_size((unsigned char *)rdataset,
1300                                           sizeof(*rdataset));
1301         isc_mem_put(mctx, rdataset, size);
1302 }
1303
1304 static inline void
1305 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1306         rdatasetheader_t *header, *dcurrent;
1307         isc_boolean_t make_dirty = ISC_FALSE;
1308
1309         /*
1310          * Caller must hold the node lock.
1311          */
1312
1313         /*
1314          * We set the IGNORE attribute on rdatasets with serial number
1315          * 'serial'.  When the reference count goes to zero, these rdatasets
1316          * will be cleaned up; until that time, they will be ignored.
1317          */
1318         for (header = node->data; header != NULL; header = header->next) {
1319                 if (header->serial == serial) {
1320                         header->attributes |= RDATASET_ATTR_IGNORE;
1321                         make_dirty = ISC_TRUE;
1322                 }
1323                 for (dcurrent = header->down;
1324                      dcurrent != NULL;
1325                      dcurrent = dcurrent->down) {
1326                         if (dcurrent->serial == serial) {
1327                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1328                                 make_dirty = ISC_TRUE;
1329                         }
1330                 }
1331         }
1332         if (make_dirty)
1333                 node->dirty = 1;
1334 }
1335
1336 static inline void
1337 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1338 {
1339         rdatasetheader_t *d, *down_next;
1340
1341         for (d = top->down; d != NULL; d = down_next) {
1342                 down_next = d->down;
1343                 free_rdataset(rbtdb, mctx, d);
1344         }
1345         top->down = NULL;
1346 }
1347
1348 static inline void
1349 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1350         rdatasetheader_t *current, *top_prev, *top_next;
1351         isc_mem_t *mctx = rbtdb->common.mctx;
1352
1353         /*
1354          * Caller must be holding the node lock.
1355          */
1356
1357         top_prev = NULL;
1358         for (current = node->data; current != NULL; current = top_next) {
1359                 top_next = current->next;
1360                 clean_stale_headers(rbtdb, mctx, current);
1361                 /*
1362                  * If current is nonexistent or stale, we can clean it up.
1363                  */
1364                 if ((current->attributes &
1365                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1366                         if (top_prev != NULL)
1367                                 top_prev->next = current->next;
1368                         else
1369                                 node->data = current->next;
1370                         free_rdataset(rbtdb, mctx, current);
1371                 } else
1372                         top_prev = current;
1373         }
1374         node->dirty = 0;
1375 }
1376
1377 static inline void
1378 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1379                 rbtdb_serial_t least_serial)
1380 {
1381         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1382         rdatasetheader_t *top_prev, *top_next;
1383         isc_mem_t *mctx = rbtdb->common.mctx;
1384         isc_boolean_t still_dirty = ISC_FALSE;
1385
1386         /*
1387          * Caller must be holding the node lock.
1388          */
1389         REQUIRE(least_serial != 0);
1390
1391         top_prev = NULL;
1392         for (current = node->data; current != NULL; current = top_next) {
1393                 top_next = current->next;
1394
1395                 /*
1396                  * First, we clean up any instances of multiple rdatasets
1397                  * with the same serial number, or that have the IGNORE
1398                  * attribute.
1399                  */
1400                 dparent = current;
1401                 for (dcurrent = current->down;
1402                      dcurrent != NULL;
1403                      dcurrent = down_next) {
1404                         down_next = dcurrent->down;
1405                         INSIST(dcurrent->serial <= dparent->serial);
1406                         if (dcurrent->serial == dparent->serial ||
1407                             IGNORE(dcurrent)) {
1408                                 if (down_next != NULL)
1409                                         down_next->next = dparent;
1410                                 dparent->down = down_next;
1411                                 free_rdataset(rbtdb, mctx, dcurrent);
1412                         } else
1413                                 dparent = dcurrent;
1414                 }
1415
1416                 /*
1417                  * We've now eliminated all IGNORE datasets with the possible
1418                  * exception of current, which we now check.
1419                  */
1420                 if (IGNORE(current)) {
1421                         down_next = current->down;
1422                         if (down_next == NULL) {
1423                                 if (top_prev != NULL)
1424                                         top_prev->next = current->next;
1425                                 else
1426                                         node->data = current->next;
1427                                 free_rdataset(rbtdb, mctx, current);
1428                                 /*
1429                                  * current no longer exists, so we can
1430                                  * just continue with the loop.
1431                                  */
1432                                 continue;
1433                         } else {
1434                                 /*
1435                                  * Pull up current->down, making it the new
1436                                  * current.
1437                                  */
1438                                 if (top_prev != NULL)
1439                                         top_prev->next = down_next;
1440                                 else
1441                                         node->data = down_next;
1442                                 down_next->next = top_next;
1443                                 free_rdataset(rbtdb, mctx, current);
1444                                 current = down_next;
1445                         }
1446                 }
1447
1448                 /*
1449                  * We now try to find the first down node less than the
1450                  * least serial.
1451                  */
1452                 dparent = current;
1453                 for (dcurrent = current->down;
1454                      dcurrent != NULL;
1455                      dcurrent = down_next) {
1456                         down_next = dcurrent->down;
1457                         if (dcurrent->serial < least_serial)
1458                                 break;
1459                         dparent = dcurrent;
1460                 }
1461
1462                 /*
1463                  * If there is a such an rdataset, delete it and any older
1464                  * versions.
1465                  */
1466                 if (dcurrent != NULL) {
1467                         do {
1468                                 down_next = dcurrent->down;
1469                                 INSIST(dcurrent->serial <= least_serial);
1470                                 free_rdataset(rbtdb, mctx, dcurrent);
1471                                 dcurrent = down_next;
1472                         } while (dcurrent != NULL);
1473                         dparent->down = NULL;
1474                 }
1475
1476                 /*
1477                  * Note.  The serial number of 'current' might be less than
1478                  * least_serial too, but we cannot delete it because it is
1479                  * the most recent version, unless it is a NONEXISTENT
1480                  * rdataset.
1481                  */
1482                 if (current->down != NULL) {
1483                         still_dirty = ISC_TRUE;
1484                         top_prev = current;
1485                 } else {
1486                         /*
1487                          * If this is a NONEXISTENT rdataset, we can delete it.
1488                          */
1489                         if (NONEXISTENT(current)) {
1490                                 if (top_prev != NULL)
1491                                         top_prev->next = current->next;
1492                                 else
1493                                         node->data = current->next;
1494                                 free_rdataset(rbtdb, mctx, current);
1495                         } else
1496                                 top_prev = current;
1497                 }
1498         }
1499         if (!still_dirty)
1500                 node->dirty = 0;
1501 }
1502
1503 static void
1504 delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node)
1505 {
1506         dns_rbtnode_t *nsecnode;
1507         dns_fixedname_t fname;
1508         dns_name_t *name;
1509         isc_result_t result = ISC_R_UNEXPECTED;
1510
1511         INSIST(!ISC_LINK_LINKED(node, deadlink));
1512
1513         switch (node->nsec) {
1514         case DNS_RBT_NSEC_NORMAL:
1515 #ifdef BIND9
1516                 if (rbtdb->rpz_cidr != NULL) {
1517                         dns_fixedname_init(&fname);
1518                         name = dns_fixedname_name(&fname);
1519                         dns_rbt_fullnamefromnode(node, name);
1520                         dns_rpz_cidr_deleteip(rbtdb->rpz_cidr, name);
1521                 }
1522 #endif
1523                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1524                 break;
1525         case DNS_RBT_NSEC_HAS_NSEC:
1526                 dns_fixedname_init(&fname);
1527                 name = dns_fixedname_name(&fname);
1528                 dns_rbt_fullnamefromnode(node, name);
1529                 /*
1530                  * Delete the corresponding node from the auxiliary NSEC
1531                  * tree before deleting from the main tree.
1532                  */
1533                 nsecnode = NULL;
1534                 result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode,
1535                                           NULL, DNS_RBTFIND_EMPTYDATA,
1536                                           NULL, NULL);
1537                 if (result != ISC_R_SUCCESS) {
1538                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1539                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1540                                       "delete_node: "
1541                                       "dns_rbt_findnode(nsec): %s",
1542                                       isc_result_totext(result));
1543                 } else {
1544                         result = dns_rbt_deletenode(rbtdb->nsec, nsecnode,
1545                                                     ISC_FALSE);
1546                         if (result != ISC_R_SUCCESS) {
1547                                 isc_log_write(dns_lctx,
1548                                               DNS_LOGCATEGORY_DATABASE,
1549                                               DNS_LOGMODULE_CACHE,
1550                                               ISC_LOG_WARNING,
1551                                               "delete_nsecnode(): "
1552                                               "dns_rbt_deletenode(nsecnode): %s",
1553                                               isc_result_totext(result));
1554                         }
1555                 }
1556                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1557 #ifdef BIND9
1558                 dns_rpz_cidr_deleteip(rbtdb->rpz_cidr, name);
1559 #endif
1560                 break;
1561         case DNS_RBT_NSEC_NSEC:
1562                 result = dns_rbt_deletenode(rbtdb->nsec, node, ISC_FALSE);
1563                 break;
1564         case DNS_RBT_NSEC_NSEC3:
1565                 result = dns_rbt_deletenode(rbtdb->nsec3, node, ISC_FALSE);
1566                 break;
1567         }
1568         if (result != ISC_R_SUCCESS) {
1569                 isc_log_write(dns_lctx,
1570                               DNS_LOGCATEGORY_DATABASE,
1571                               DNS_LOGMODULE_CACHE,
1572                               ISC_LOG_WARNING,
1573                               "delete_nsecnode(): "
1574                               "dns_rbt_deletenode: %s",
1575                               isc_result_totext(result));
1576         }
1577 }
1578
1579 /*%
1580  * Clean up dead nodes.  These are nodes which have no references, and
1581  * have no data.  They are dead but we could not or chose not to delete
1582  * them when we deleted all the data at that node because we did not want
1583  * to wait for the tree write lock.
1584  *
1585  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1586  */
1587 static void
1588 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1589         dns_rbtnode_t *node;
1590         int count = 10;         /* XXXJT: should be adjustable */
1591
1592         node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1593         while (node != NULL && count > 0) {
1594                 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1595
1596                 /*
1597                  * Since we're holding a tree write lock, it should be
1598                  * impossible for this node to be referenced by others.
1599                  */
1600                 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1601                        node->data == NULL);
1602
1603                 delete_node(rbtdb, node);
1604
1605                 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1606                 count--;
1607         }
1608 }
1609
1610 /*
1611  * Caller must be holding the node lock.
1612  */
1613 static inline void
1614 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1615         unsigned int lockrefs, noderefs;
1616         isc_refcount_t *lockref;
1617
1618         INSIST(!ISC_LINK_LINKED(node, deadlink));
1619         dns_rbtnode_refincrement0(node, &noderefs);
1620         if (noderefs == 1) {    /* this is the first reference to the node */
1621                 lockref = &rbtdb->node_locks[node->locknum].references;
1622                 isc_refcount_increment0(lockref, &lockrefs);
1623                 INSIST(lockrefs != 0);
1624         }
1625         INSIST(noderefs != 0);
1626 }
1627
1628 /*
1629  * This function is assumed to be called when a node is newly referenced
1630  * and can be in the deadnode list.  In that case the node must be retrieved
1631  * from the list because it is going to be used.  In addition, if the caller
1632  * happens to hold a write lock on the tree, it's a good chance to purge dead
1633  * nodes.
1634  * Note: while a new reference is gained in multiple places, there are only very
1635  * few cases where the node can be in the deadnode list (only empty nodes can
1636  * have been added to the list).
1637  */
1638 static inline void
1639 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1640                 isc_rwlocktype_t treelocktype)
1641 {
1642         isc_rwlocktype_t locktype = isc_rwlocktype_read;
1643         nodelock_t *nodelock = &rbtdb->node_locks[node->locknum].lock;
1644         isc_boolean_t maybe_cleanup = ISC_FALSE;
1645
1646         POST(locktype);
1647
1648         NODE_STRONGLOCK(nodelock);
1649         NODE_WEAKLOCK(nodelock, locktype);
1650
1651         /*
1652          * Check if we can possibly cleanup the dead node.  If so, upgrade
1653          * the node lock below to perform the cleanup.
1654          */
1655         if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1656             treelocktype == isc_rwlocktype_write) {
1657                 maybe_cleanup = ISC_TRUE;
1658         }
1659
1660         if (ISC_LINK_LINKED(node, deadlink) || maybe_cleanup) {
1661                 /*
1662                  * Upgrade the lock and test if we still need to unlink.
1663                  */
1664                 NODE_WEAKUNLOCK(nodelock, locktype);
1665                 locktype = isc_rwlocktype_write;
1666                 POST(locktype);
1667                 NODE_WEAKLOCK(nodelock, locktype);
1668                 if (ISC_LINK_LINKED(node, deadlink))
1669                         ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1670                                         node, deadlink);
1671                 if (maybe_cleanup)
1672                         cleanup_dead_nodes(rbtdb, node->locknum);
1673         }
1674
1675         new_reference(rbtdb, node);
1676
1677         NODE_WEAKUNLOCK(nodelock, locktype);
1678         NODE_STRONGUNLOCK(nodelock);
1679 }
1680
1681 /*
1682  * Caller must be holding the node lock; either the "strong", read or write
1683  * lock.  Note that the lock must be held even when node references are
1684  * atomically modified; in that case the decrement operation itself does not
1685  * have to be protected, but we must avoid a race condition where multiple
1686  * threads are decreasing the reference to zero simultaneously and at least
1687  * one of them is going to free the node.
1688  * This function returns ISC_TRUE if and only if the node reference decreases
1689  * to zero.
1690  */
1691 static isc_boolean_t
1692 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1693                     rbtdb_serial_t least_serial,
1694                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1695                     isc_boolean_t pruning)
1696 {
1697         isc_result_t result;
1698         isc_boolean_t write_locked;
1699         rbtdb_nodelock_t *nodelock;
1700         unsigned int refs, nrefs;
1701         int bucket = node->locknum;
1702         isc_boolean_t no_reference = ISC_TRUE;
1703
1704         nodelock = &rbtdb->node_locks[bucket];
1705
1706         /* Handle easy and typical case first. */
1707         if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1708                 dns_rbtnode_refdecrement(node, &nrefs);
1709                 INSIST((int)nrefs >= 0);
1710                 if (nrefs == 0) {
1711                         isc_refcount_decrement(&nodelock->references, &refs);
1712                         INSIST((int)refs >= 0);
1713                 }
1714                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1715         }
1716
1717         /* Upgrade the lock? */
1718         if (nlock == isc_rwlocktype_read) {
1719                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1720                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1721         }
1722
1723         dns_rbtnode_refdecrement(node, &nrefs);
1724         INSIST((int)nrefs >= 0);
1725         if (nrefs > 0) {
1726                 /* Restore the lock? */
1727                 if (nlock == isc_rwlocktype_read)
1728                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1729                 return (ISC_FALSE);
1730         }
1731
1732         if (node->dirty) {
1733                 if (IS_CACHE(rbtdb))
1734                         clean_cache_node(rbtdb, node);
1735                 else {
1736                         if (least_serial == 0) {
1737                                 /*
1738                                  * Caller doesn't know the least serial.
1739                                  * Get it.
1740                                  */
1741                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1742                                 least_serial = rbtdb->least_serial;
1743                                 RBTDB_UNLOCK(&rbtdb->lock,
1744                                              isc_rwlocktype_read);
1745                         }
1746                         clean_zone_node(rbtdb, node, least_serial);
1747                 }
1748         }
1749
1750         /*
1751          * Attempt to switch to a write lock on the tree.  If this fails,
1752          * we will add this node to a linked list of nodes in this locking
1753          * bucket which we will free later.
1754          */
1755         if (tlock != isc_rwlocktype_write) {
1756                 /*
1757                  * Locking hierarchy notwithstanding, we don't need to free
1758                  * the node lock before acquiring the tree write lock because
1759                  * we only do a trylock.
1760                  */
1761                 if (tlock == isc_rwlocktype_read)
1762                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1763                 else
1764                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
1765                                                     isc_rwlocktype_write);
1766                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1767                               result == ISC_R_LOCKBUSY);
1768
1769                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1770         } else
1771                 write_locked = ISC_TRUE;
1772
1773         isc_refcount_decrement(&nodelock->references, &refs);
1774         INSIST((int)refs >= 0);
1775
1776         /*
1777          * XXXDCL should this only be done for cache zones?
1778          */
1779         if (node->data != NULL || node->down != NULL)
1780                 goto restore_locks;
1781
1782         if (write_locked) {
1783                 /*
1784                  * We can now delete the node.
1785                  */
1786
1787                 /*
1788                  * If this node is the only one in the level it's in, deleting
1789                  * this node may recursively make its parent the only node in
1790                  * the parent level; if so, and if no one is currently using
1791                  * the parent node, this is almost the only opportunity to
1792                  * clean it up.  But the recursive cleanup is not that trivial
1793                  * since the child and parent may be in different lock buckets,
1794                  * which would cause a lock order reversal problem.  To avoid
1795                  * the trouble, we'll dispatch a separate event for batch
1796                  * cleaning.  We need to check whether we're deleting the node
1797                  * as a result of pruning to avoid infinite dispatching.
1798                  * Note: pruning happens only when a task has been set for the
1799                  * rbtdb.  If the user of the rbtdb chooses not to set a task,
1800                  * it's their responsibility to purge stale leaves (e.g. by
1801                  * periodic walk-through).
1802                  */
1803                 if (!pruning && node->parent != NULL &&
1804                     node->parent->down == node && node->left == NULL &&
1805                     node->right == NULL && rbtdb->task != NULL) {
1806                         isc_event_t *ev;
1807                         dns_db_t *db;
1808
1809                         ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1810                                                 DNS_EVENT_RBTPRUNE,
1811                                                 prune_tree, node,
1812                                                 sizeof(isc_event_t));
1813                         if (ev != NULL) {
1814                                 new_reference(rbtdb, node);
1815                                 db = NULL;
1816                                 attach((dns_db_t *)rbtdb, &db);
1817                                 ev->ev_sender = db;
1818                                 isc_task_send(rbtdb->task, &ev);
1819                                 no_reference = ISC_FALSE;
1820                         } else {
1821                                 /*
1822                                  * XXX: this is a weird situation.  We could
1823                                  * ignore this error case, but then the stale
1824                                  * node will unlikely be purged except via a
1825                                  * rare condition such as manual cleanup.  So
1826                                  * we queue it in the deadnodes list, hoping
1827                                  * the memory shortage is temporary and the node
1828                                  * will be deleted later.
1829                                  */
1830                                 isc_log_write(dns_lctx,
1831                                               DNS_LOGCATEGORY_DATABASE,
1832                                               DNS_LOGMODULE_CACHE,
1833                                               ISC_LOG_INFO,
1834                                               "decrement_reference: failed to "
1835                                               "allocate pruning event");
1836                                 INSIST(node->data == NULL);
1837                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1838                                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1839                                                 deadlink);
1840                         }
1841                 } else {
1842                         if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1843                                 char printname[DNS_NAME_FORMATSIZE];
1844
1845                                 isc_log_write(dns_lctx,
1846                                               DNS_LOGCATEGORY_DATABASE,
1847                                               DNS_LOGMODULE_CACHE,
1848                                               ISC_LOG_DEBUG(1),
1849                                               "decrement_reference: "
1850                                               "delete from rbt: %p %s",
1851                                               node,
1852                                               dns_rbt_formatnodename(node,
1853                                                         printname,
1854                                                         sizeof(printname)));
1855                         }
1856
1857                         delete_node(rbtdb, node);
1858                 }
1859         } else {
1860                 INSIST(node->data == NULL);
1861                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1862                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1863         }
1864
1865  restore_locks:
1866         /* Restore the lock? */
1867         if (nlock == isc_rwlocktype_read)
1868                 NODE_WEAKDOWNGRADE(&nodelock->lock);
1869
1870         /*
1871          * Relock a read lock, or unlock the write lock if no lock was held.
1872          */
1873         if (tlock == isc_rwlocktype_none)
1874                 if (write_locked)
1875                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1876
1877         if (tlock == isc_rwlocktype_read)
1878                 if (write_locked)
1879                         isc_rwlock_downgrade(&rbtdb->tree_lock);
1880
1881         return (no_reference);
1882 }
1883
1884 /*
1885  * Prune the tree by recursively cleaning-up single leaves.  In the worst
1886  * case, the number of iteration is the number of tree levels, which is at
1887  * most the maximum number of domain name labels, i.e, 127.  In practice, this
1888  * should be much smaller (only a few times), and even the worst case would be
1889  * acceptable for a single event.
1890  */
1891 static void
1892 prune_tree(isc_task_t *task, isc_event_t *event) {
1893         dns_rbtdb_t *rbtdb = event->ev_sender;
1894         dns_rbtnode_t *node = event->ev_arg;
1895         dns_rbtnode_t *parent;
1896         unsigned int locknum;
1897
1898         UNUSED(task);
1899
1900         isc_event_free(&event);
1901
1902         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1903         locknum = node->locknum;
1904         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1905         do {
1906                 parent = node->parent;
1907                 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1908                                     isc_rwlocktype_write, ISC_TRUE);
1909
1910                 if (parent != NULL && parent->down == NULL) {
1911                         /*
1912                          * node was the only down child of the parent and has
1913                          * just been removed.  We'll then need to examine the
1914                          * parent.  Keep the lock if possible; otherwise,
1915                          * release the old lock and acquire one for the parent.
1916                          */
1917                         if (parent->locknum != locknum) {
1918                                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1919                                             isc_rwlocktype_write);
1920                                 locknum = parent->locknum;
1921                                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1922                                           isc_rwlocktype_write);
1923                         }
1924
1925                         /*
1926                          * We need to gain a reference to the node before
1927                          * decrementing it in the next iteration.  In addition,
1928                          * if the node is in the dead-nodes list, extract it
1929                          * from the list beforehand as we do in
1930                          * reactivate_node().
1931                          */
1932                         if (ISC_LINK_LINKED(parent, deadlink))
1933                                 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1934                                                 parent, deadlink);
1935                         new_reference(rbtdb, parent);
1936                 } else
1937                         parent = NULL;
1938
1939                 node = parent;
1940         } while (node != NULL);
1941         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1942         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1943
1944         detach((dns_db_t **)&rbtdb);
1945 }
1946
1947 static inline void
1948 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1949                    rbtdb_changedlist_t *cleanup_list)
1950 {
1951         /*
1952          * Caller must be holding the database lock.
1953          */
1954
1955         rbtdb->least_serial = version->serial;
1956         *cleanup_list = version->changed_list;
1957         ISC_LIST_INIT(version->changed_list);
1958 }
1959
1960 static inline void
1961 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1962         rbtdb_changed_t *changed, *next_changed;
1963
1964         /*
1965          * If the changed record is dirty, then
1966          * an update created multiple versions of
1967          * a given rdataset.  We keep this list
1968          * until we're the least open version, at
1969          * which point it's safe to get rid of any
1970          * older versions.
1971          *
1972          * If the changed record isn't dirty, then
1973          * we don't need it anymore since we're
1974          * committing and not rolling back.
1975          *
1976          * The caller must be holding the database lock.
1977          */
1978         for (changed = HEAD(version->changed_list);
1979              changed != NULL;
1980              changed = next_changed) {
1981                 next_changed = NEXT(changed, link);
1982                 if (!changed->dirty) {
1983                         UNLINK(version->changed_list,
1984                                changed, link);
1985                         APPEND(*cleanup_list,
1986                                changed, link);
1987                 }
1988         }
1989 }
1990
1991 static void
1992 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1993 #ifndef BIND9
1994         UNUSED(db);
1995         UNUSED(version);
1996         UNUSED(origin);
1997
1998         return;
1999 #else
2000         dns_rdataset_t keyset;
2001         dns_rdataset_t nsecset, signsecset;
2002         isc_boolean_t haszonekey = ISC_FALSE;
2003         isc_boolean_t hasnsec = ISC_FALSE;
2004         isc_result_t result;
2005
2006         dns_rdataset_init(&keyset);
2007         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
2008                                      0, 0, &keyset, NULL);
2009         if (result == ISC_R_SUCCESS) {
2010                 result = dns_rdataset_first(&keyset);
2011                 while (result == ISC_R_SUCCESS) {
2012                         dns_rdata_t keyrdata = DNS_RDATA_INIT;
2013                         dns_rdataset_current(&keyset, &keyrdata);
2014                         if (dns_zonekey_iszonekey(&keyrdata)) {
2015                                 haszonekey = ISC_TRUE;
2016                                 break;
2017                         }
2018                         result = dns_rdataset_next(&keyset);
2019                 }
2020                 dns_rdataset_disassociate(&keyset);
2021         }
2022         if (!haszonekey) {
2023                 version->secure = dns_db_insecure;
2024                 version->havensec3 = ISC_FALSE;
2025                 return;
2026         }
2027
2028         dns_rdataset_init(&nsecset);
2029         dns_rdataset_init(&signsecset);
2030         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
2031                                      0, 0, &nsecset, &signsecset);
2032         if (result == ISC_R_SUCCESS) {
2033                 if (dns_rdataset_isassociated(&signsecset)) {
2034                         hasnsec = ISC_TRUE;
2035                         dns_rdataset_disassociate(&signsecset);
2036                 }
2037                 dns_rdataset_disassociate(&nsecset);
2038         }
2039
2040         setnsec3parameters(db, version);
2041
2042         /*
2043          * Do we have a valid NSEC/NSEC3 chain?
2044          */
2045         if (version->havensec3 || hasnsec)
2046                 version->secure = dns_db_secure;
2047         else
2048                 version->secure = dns_db_insecure;
2049 #endif
2050 }
2051
2052 /*%<
2053  * Walk the origin node looking for NSEC3PARAM records.
2054  * Cache the nsec3 parameters.
2055  */
2056 #ifdef BIND9
2057 static void
2058 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version) {
2059         dns_rbtnode_t *node;
2060         dns_rdata_nsec3param_t nsec3param;
2061         dns_rdata_t rdata = DNS_RDATA_INIT;
2062         isc_region_t region;
2063         isc_result_t result;
2064         rdatasetheader_t *header, *header_next;
2065         unsigned char *raw;             /* RDATASLAB */
2066         unsigned int count, length;
2067         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2068
2069         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2070         version->havensec3 = ISC_FALSE;
2071         node = rbtdb->origin_node;
2072         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2073                   isc_rwlocktype_read);
2074         for (header = node->data;
2075              header != NULL;
2076              header = header_next) {
2077                 header_next = header->next;
2078                 do {
2079                         if (header->serial <= version->serial &&
2080                             !IGNORE(header)) {
2081                                 if (NONEXISTENT(header))
2082                                         header = NULL;
2083                                 break;
2084                         } else
2085                                 header = header->down;
2086                 } while (header != NULL);
2087
2088                 if (header != NULL &&
2089                     (header->type == dns_rdatatype_nsec3param)) {
2090                         /*
2091                          * Find A NSEC3PARAM with a supported algorithm.
2092                          */
2093                         raw = (unsigned char *)header + sizeof(*header);
2094                         count = raw[0] * 256 + raw[1]; /* count */
2095 #if DNS_RDATASET_FIXED
2096                         raw += count * 4 + 2;
2097 #else
2098                         raw += 2;
2099 #endif
2100                         while (count-- > 0U) {
2101                                 length = raw[0] * 256 + raw[1];
2102 #if DNS_RDATASET_FIXED
2103                                 raw += 4;
2104 #else
2105                                 raw += 2;
2106 #endif
2107                                 region.base = raw;
2108                                 region.length = length;
2109                                 raw += length;
2110                                 dns_rdata_fromregion(&rdata,
2111                                                      rbtdb->common.rdclass,
2112                                                      dns_rdatatype_nsec3param,
2113                                                      &region);
2114                                 result = dns_rdata_tostruct(&rdata,
2115                                                             &nsec3param,
2116                                                             NULL);
2117                                 INSIST(result == ISC_R_SUCCESS);
2118                                 dns_rdata_reset(&rdata);
2119
2120                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2121                                     !dns_nsec3_supportedhash(nsec3param.hash))
2122                                         continue;
2123
2124                                 if (nsec3param.flags != 0)
2125                                         continue;
2126
2127                                 memcpy(version->salt, nsec3param.salt,
2128                                        nsec3param.salt_length);
2129                                 version->hash = nsec3param.hash;
2130                                 version->salt_length = nsec3param.salt_length;
2131                                 version->iterations = nsec3param.iterations;
2132                                 version->flags = nsec3param.flags;
2133                                 version->havensec3 = ISC_TRUE;
2134                                 /*
2135                                  * Look for a better algorithm than the
2136                                  * unknown test algorithm.
2137                                  */
2138                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2139                                         goto unlock;
2140                         }
2141                 }
2142         }
2143  unlock:
2144         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2145                     isc_rwlocktype_read);
2146         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2147 }
2148 #endif
2149
2150 static void
2151 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
2152         dns_rbtdb_t *rbtdb = event->ev_arg;
2153         isc_boolean_t again = ISC_FALSE;
2154         unsigned int locknum;
2155         unsigned int refs;
2156
2157         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2158         for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
2159                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2160                           isc_rwlocktype_write);
2161                 cleanup_dead_nodes(rbtdb, locknum);
2162                 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL)
2163                         again = ISC_TRUE;
2164                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2165                             isc_rwlocktype_write);
2166         }
2167         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2168         if (again)
2169                 isc_task_send(task, &event);
2170         else {
2171                 isc_event_free(&event);
2172                 isc_refcount_decrement(&rbtdb->references, &refs);
2173                 if (refs == 0)
2174                         maybe_free_rbtdb(rbtdb);
2175         }
2176 }
2177
2178 static void
2179 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2180         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2181         rbtdb_version_t *version, *cleanup_version, *least_greater;
2182         isc_boolean_t rollback = ISC_FALSE;
2183         rbtdb_changedlist_t cleanup_list;
2184         rdatasetheaderlist_t resigned_list;
2185         rbtdb_changed_t *changed, *next_changed;
2186         rbtdb_serial_t serial, least_serial;
2187         dns_rbtnode_t *rbtnode;
2188         unsigned int refs;
2189         rdatasetheader_t *header;
2190         isc_boolean_t writer;
2191
2192         REQUIRE(VALID_RBTDB(rbtdb));
2193         version = (rbtdb_version_t *)*versionp;
2194         INSIST(version->rbtdb == rbtdb);
2195
2196         cleanup_version = NULL;
2197         ISC_LIST_INIT(cleanup_list);
2198         ISC_LIST_INIT(resigned_list);
2199
2200         isc_refcount_decrement(&version->references, &refs);
2201         if (refs > 0) {         /* typical and easy case first */
2202                 if (commit) {
2203                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2204                         INSIST(!version->writer);
2205                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2206                 }
2207                 goto end;
2208         }
2209
2210         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2211         serial = version->serial;
2212         writer = version->writer;
2213         if (version->writer) {
2214                 if (commit) {
2215                         unsigned cur_ref;
2216                         rbtdb_version_t *cur_version;
2217
2218                         INSIST(version->commit_ok);
2219                         INSIST(version == rbtdb->future_version);
2220                         /*
2221                          * The current version is going to be replaced.
2222                          * Release the (likely last) reference to it from the
2223                          * DB itself and unlink it from the open list.
2224                          */
2225                         cur_version = rbtdb->current_version;
2226                         isc_refcount_decrement(&cur_version->references,
2227                                                &cur_ref);
2228                         if (cur_ref == 0) {
2229                                 if (cur_version->serial == rbtdb->least_serial)
2230                                         INSIST(EMPTY(cur_version->changed_list));
2231                                 UNLINK(rbtdb->open_versions,
2232                                        cur_version, link);
2233                         }
2234                         if (EMPTY(rbtdb->open_versions)) {
2235                                 /*
2236                                  * We're going to become the least open
2237                                  * version.
2238                                  */
2239                                 make_least_version(rbtdb, version,
2240                                                    &cleanup_list);
2241                         } else {
2242                                 /*
2243                                  * Some other open version is the
2244                                  * least version.  We can't cleanup
2245                                  * records that were changed in this
2246                                  * version because the older versions
2247                                  * may still be in use by an open
2248                                  * version.
2249                                  *
2250                                  * We can, however, discard the
2251                                  * changed records for things that
2252                                  * we've added that didn't exist in
2253                                  * prior versions.
2254                                  */
2255                                 cleanup_nondirty(version, &cleanup_list);
2256                         }
2257                         /*
2258                          * If the (soon to be former) current version
2259                          * isn't being used by anyone, we can clean
2260                          * it up.
2261                          */
2262                         if (cur_ref == 0) {
2263                                 cleanup_version = cur_version;
2264                                 APPENDLIST(version->changed_list,
2265                                            cleanup_version->changed_list,
2266                                            link);
2267                         }
2268                         /*
2269                          * Become the current version.
2270                          */
2271                         version->writer = ISC_FALSE;
2272                         rbtdb->current_version = version;
2273                         rbtdb->current_serial = version->serial;
2274                         rbtdb->future_version = NULL;
2275
2276                         /*
2277                          * Keep the current version in the open list, and
2278                          * gain a reference for the DB itself (see the DB
2279                          * creation function below).  This must be the only
2280                          * case where we need to increment the counter from
2281                          * zero and need to use isc_refcount_increment0().
2282                          */
2283                         isc_refcount_increment0(&version->references,
2284                                                 &cur_ref);
2285                         INSIST(cur_ref == 1);
2286                         PREPEND(rbtdb->open_versions,
2287                                 rbtdb->current_version, link);
2288                         resigned_list = version->resigned_list;
2289                         ISC_LIST_INIT(version->resigned_list);
2290                 } else {
2291                         /*
2292                          * We're rolling back this transaction.
2293                          */
2294                         cleanup_list = version->changed_list;
2295                         ISC_LIST_INIT(version->changed_list);
2296                         resigned_list = version->resigned_list;
2297                         ISC_LIST_INIT(version->resigned_list);
2298                         rollback = ISC_TRUE;
2299                         cleanup_version = version;
2300                         rbtdb->future_version = NULL;
2301                 }
2302         } else {
2303                 if (version != rbtdb->current_version) {
2304                         /*
2305                          * There are no external or internal references
2306                          * to this version and it can be cleaned up.
2307                          */
2308                         cleanup_version = version;
2309
2310                         /*
2311                          * Find the version with the least serial
2312                          * number greater than ours.
2313                          */
2314                         least_greater = PREV(version, link);
2315                         if (least_greater == NULL)
2316                                 least_greater = rbtdb->current_version;
2317
2318                         INSIST(version->serial < least_greater->serial);
2319                         /*
2320                          * Is this the least open version?
2321                          */
2322                         if (version->serial == rbtdb->least_serial) {
2323                                 /*
2324                                  * Yes.  Install the new least open
2325                                  * version.
2326                                  */
2327                                 make_least_version(rbtdb,
2328                                                    least_greater,
2329                                                    &cleanup_list);
2330                         } else {
2331                                 /*
2332                                  * Add any unexecuted cleanups to
2333                                  * those of the least greater version.
2334                                  */
2335                                 APPENDLIST(least_greater->changed_list,
2336                                            version->changed_list,
2337                                            link);
2338                         }
2339                 } else if (version->serial == rbtdb->least_serial)
2340                         INSIST(EMPTY(version->changed_list));
2341                 UNLINK(rbtdb->open_versions, version, link);
2342         }
2343         least_serial = rbtdb->least_serial;
2344         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2345
2346         /*
2347          * Update the zone's secure status.
2348          */
2349         if (writer && commit && !IS_CACHE(rbtdb))
2350                 iszonesecure(db, version, rbtdb->origin_node);
2351
2352         if (cleanup_version != NULL) {
2353                 INSIST(EMPTY(cleanup_version->changed_list));
2354                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2355                             sizeof(*cleanup_version));
2356         }
2357
2358         /*
2359          * Commit/rollback re-signed headers.
2360          */
2361         for (header = HEAD(resigned_list);
2362              header != NULL;
2363              header = HEAD(resigned_list)) {
2364                 nodelock_t *lock;
2365
2366                 ISC_LIST_UNLINK(resigned_list, header, link);
2367
2368                 lock = &rbtdb->node_locks[header->node->locknum].lock;
2369                 NODE_LOCK(lock, isc_rwlocktype_write);
2370                 if (rollback)
2371                         resign_insert(rbtdb, header->node->locknum, header);
2372                 decrement_reference(rbtdb, header->node, least_serial,
2373                                     isc_rwlocktype_write, isc_rwlocktype_none,
2374                                     ISC_FALSE);
2375                 NODE_UNLOCK(lock, isc_rwlocktype_write);
2376         }
2377
2378         if (!EMPTY(cleanup_list)) {
2379                 isc_event_t *event = NULL;
2380                 isc_rwlocktype_t tlock = isc_rwlocktype_none;
2381
2382                 if (rbtdb->task != NULL)
2383                         event = isc_event_allocate(rbtdb->common.mctx, NULL,
2384                                                    DNS_EVENT_RBTDEADNODES,
2385                                                    cleanup_dead_nodes_callback,
2386                                                    rbtdb, sizeof(isc_event_t));
2387                 if (event == NULL) {
2388                         /*
2389                          * We acquire a tree write lock here in order to make
2390                          * sure that stale nodes will be removed in
2391                          * decrement_reference().  If we didn't have the lock,
2392                          * those nodes could miss the chance to be removed
2393                          * until the server stops.  The write lock is
2394                          * expensive, but this event should be rare enough
2395                          * to justify the cost.
2396                          */
2397                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2398                         tlock = isc_rwlocktype_write;
2399                 }
2400
2401                 for (changed = HEAD(cleanup_list);
2402                      changed != NULL;
2403                      changed = next_changed) {
2404                         nodelock_t *lock;
2405
2406                         next_changed = NEXT(changed, link);
2407                         rbtnode = changed->node;
2408                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2409
2410                         NODE_LOCK(lock, isc_rwlocktype_write);
2411                         /*
2412                          * This is a good opportunity to purge any dead nodes,
2413                          * so use it.
2414                          */
2415                         if (event == NULL)
2416                                 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2417
2418                         if (rollback)
2419                                 rollback_node(rbtnode, serial);
2420                         decrement_reference(rbtdb, rbtnode, least_serial,
2421                                             isc_rwlocktype_write, tlock,
2422                                             ISC_FALSE);
2423
2424                         NODE_UNLOCK(lock, isc_rwlocktype_write);
2425
2426                         isc_mem_put(rbtdb->common.mctx, changed,
2427                                     sizeof(*changed));
2428                 }
2429                 if (event != NULL) {
2430                         isc_refcount_increment(&rbtdb->references, NULL);
2431                         isc_task_send(rbtdb->task, &event);
2432                 } else
2433                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2434         }
2435
2436  end:
2437         *versionp = NULL;
2438 }
2439
2440 /*
2441  * Add the necessary magic for the wildcard name 'name'
2442  * to be found in 'rbtdb'.
2443  *
2444  * In order for wildcard matching to work correctly in
2445  * zone_find(), we must ensure that a node for the wildcarding
2446  * level exists in the database, and has its 'find_callback'
2447  * and 'wild' bits set.
2448  *
2449  * E.g. if the wildcard name is "*.sub.example." then we
2450  * must ensure that "sub.example." exists and is marked as
2451  * a wildcard level.
2452  */
2453 static isc_result_t
2454 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2455         isc_result_t result;
2456         dns_name_t foundname;
2457         dns_offsets_t offsets;
2458         unsigned int n;
2459         dns_rbtnode_t *node = NULL;
2460
2461         dns_name_init(&foundname, offsets);
2462         n = dns_name_countlabels(name);
2463         INSIST(n >= 2);
2464         n--;
2465         dns_name_getlabelsequence(name, 1, n, &foundname);
2466         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2467         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2468                 return (result);
2469         if (result == ISC_R_SUCCESS)
2470                 node->nsec = DNS_RBT_NSEC_NORMAL;
2471         node->find_callback = 1;
2472         node->wild = 1;
2473         return (ISC_R_SUCCESS);
2474 }
2475
2476 static isc_result_t
2477 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2478         isc_result_t result;
2479         dns_name_t foundname;
2480         dns_offsets_t offsets;
2481         unsigned int n, l, i;
2482
2483         dns_name_init(&foundname, offsets);
2484         n = dns_name_countlabels(name);
2485         l = dns_name_countlabels(&rbtdb->common.origin);
2486         i = l + 1;
2487         while (i < n) {
2488                 dns_rbtnode_t *node = NULL;     /* dummy */
2489                 dns_name_getlabelsequence(name, n - i, i, &foundname);
2490                 if (dns_name_iswildcard(&foundname)) {
2491                         result = add_wildcard_magic(rbtdb, &foundname);
2492                         if (result != ISC_R_SUCCESS)
2493                                 return (result);
2494                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
2495                                                  &node);
2496                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2497                                 return (result);
2498                         if (result == ISC_R_SUCCESS)
2499                                 node->nsec = DNS_RBT_NSEC_NORMAL;
2500                 }
2501                 i++;
2502         }
2503         return (ISC_R_SUCCESS);
2504 }
2505
2506 static isc_result_t
2507 findnodeintree(dns_rbtdb_t *rbtdb, dns_rbt_t *tree, dns_name_t *name,
2508                isc_boolean_t create, dns_dbnode_t **nodep)
2509 {
2510         dns_rbtnode_t *node = NULL;
2511         dns_name_t nodename;
2512         isc_result_t result;
2513         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2514
2515         INSIST(tree == rbtdb->tree || tree == rbtdb->nsec3);
2516
2517         dns_name_init(&nodename, NULL);
2518         RWLOCK(&rbtdb->tree_lock, locktype);
2519         result = dns_rbt_findnode(tree, name, NULL, &node, NULL,
2520                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2521         if (result != ISC_R_SUCCESS) {
2522                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2523                 if (!create) {
2524                         if (result == DNS_R_PARTIALMATCH)
2525                                 result = ISC_R_NOTFOUND;
2526                         return (result);
2527                 }
2528                 /*
2529                  * It would be nice to try to upgrade the lock instead of
2530                  * unlocking then relocking.
2531                  */
2532                 locktype = isc_rwlocktype_write;
2533                 RWLOCK(&rbtdb->tree_lock, locktype);
2534                 node = NULL;
2535                 result = dns_rbt_addnode(tree, name, &node);
2536                 if (result == ISC_R_SUCCESS) {
2537 #ifdef BIND9
2538                         if (tree == rbtdb->tree && rbtdb->rpz_cidr != NULL) {
2539                                 dns_fixedname_t fnamef;
2540                                 dns_name_t *fname;
2541
2542                                 dns_fixedname_init(&fnamef);
2543                                 fname = dns_fixedname_name(&fnamef);
2544                                 dns_rbt_fullnamefromnode(node, fname);
2545                                 dns_rpz_cidr_addip(rbtdb->rpz_cidr, fname);
2546                         }
2547 #endif
2548                         dns_rbt_namefromnode(node, &nodename);
2549 #ifdef DNS_RBT_USEHASH
2550                         node->locknum = node->hashval % rbtdb->node_lock_count;
2551 #else
2552                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2553                                 rbtdb->node_lock_count;
2554 #endif
2555                         if (tree == rbtdb->tree) {
2556                                 add_empty_wildcards(rbtdb, name);
2557
2558                                 if (dns_name_iswildcard(name)) {
2559                                         result = add_wildcard_magic(rbtdb, name);
2560                                         if (result != ISC_R_SUCCESS) {
2561                                                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2562                                                 return (result);
2563                                         }
2564                                 }
2565                         }
2566                         if (tree == rbtdb->nsec3)
2567                                 node->nsec = DNS_RBT_NSEC_NSEC3;
2568                 } else if (result != ISC_R_EXISTS) {
2569                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2570                         return (result);
2571                 }
2572         }
2573
2574         if (tree == rbtdb->nsec3)
2575                 INSIST(node->nsec == DNS_RBT_NSEC_NSEC3);
2576
2577         reactivate_node(rbtdb, node, locktype);
2578         RWUNLOCK(&rbtdb->tree_lock, locktype);
2579
2580         *nodep = (dns_dbnode_t *)node;
2581
2582         return (ISC_R_SUCCESS);
2583 }
2584
2585 static isc_result_t
2586 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2587          dns_dbnode_t **nodep)
2588 {
2589         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2590
2591         REQUIRE(VALID_RBTDB(rbtdb));
2592
2593         return (findnodeintree(rbtdb, rbtdb->tree, name, create, nodep));
2594 }
2595
2596 static isc_result_t
2597 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2598               dns_dbnode_t **nodep)
2599 {
2600         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2601
2602         REQUIRE(VALID_RBTDB(rbtdb));
2603
2604         return (findnodeintree(rbtdb, rbtdb->nsec3, name, create, nodep));
2605 }
2606
2607 static isc_result_t
2608 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2609         rbtdb_search_t *search = arg;
2610         rdatasetheader_t *header, *header_next;
2611         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2612         rdatasetheader_t *found;
2613         isc_result_t result;
2614         dns_rbtnode_t *onode;
2615
2616         /*
2617          * We only want to remember the topmost zone cut, since it's the one
2618          * that counts, so we'll just continue if we've already found a
2619          * zonecut.
2620          */
2621         if (search->zonecut != NULL)
2622                 return (DNS_R_CONTINUE);
2623
2624         found = NULL;
2625         result = DNS_R_CONTINUE;
2626         onode = search->rbtdb->origin_node;
2627
2628         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2629                   isc_rwlocktype_read);
2630
2631         /*
2632          * Look for an NS or DNAME rdataset active in our version.
2633          */
2634         ns_header = NULL;
2635         dname_header = NULL;
2636         sigdname_header = NULL;
2637         for (header = node->data; header != NULL; header = header_next) {
2638                 header_next = header->next;
2639                 if (header->type == dns_rdatatype_ns ||
2640                     header->type == dns_rdatatype_dname ||
2641                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
2642                         do {
2643                                 if (header->serial <= search->serial &&
2644                                     !IGNORE(header)) {
2645                                         /*
2646                                          * Is this a "this rdataset doesn't
2647                                          * exist" record?
2648                                          */
2649                                         if (NONEXISTENT(header))
2650                                                 header = NULL;
2651                                         break;
2652                                 } else
2653                                         header = header->down;
2654                         } while (header != NULL);
2655                         if (header != NULL) {
2656                                 if (header->type == dns_rdatatype_dname)
2657                                         dname_header = header;
2658                                 else if (header->type ==
2659                                            RBTDB_RDATATYPE_SIGDNAME)
2660                                         sigdname_header = header;
2661                                 else if (node != onode ||
2662                                          IS_STUB(search->rbtdb)) {
2663                                         /*
2664                                          * We've found an NS rdataset that
2665                                          * isn't at the origin node.  We check
2666                                          * that they're not at the origin node,
2667                                          * because otherwise we'd erroneously
2668                                          * treat the zone top as if it were
2669                                          * a delegation.
2670                                          */
2671                                         ns_header = header;
2672                                 }
2673                         }
2674                 }
2675         }
2676
2677         /*
2678          * Did we find anything?
2679          */
2680         if (!IS_CACHE(search->rbtdb) && !IS_STUB(search->rbtdb) &&
2681             ns_header != NULL) {
2682                 /*
2683                  * Note that NS has precedence over DNAME if both exist
2684                  * in a zone.  Otherwise DNAME take precedence over NS.
2685                  */
2686                 found = ns_header;
2687                 search->zonecut_sigrdataset = NULL;
2688         } else if (dname_header != NULL) {
2689                 found = dname_header;
2690                 search->zonecut_sigrdataset = sigdname_header;
2691         } else if (ns_header != NULL) {
2692                 found = ns_header;
2693                 search->zonecut_sigrdataset = NULL;
2694         }
2695
2696         if (found != NULL) {
2697                 /*
2698                  * We increment the reference count on node to ensure that
2699                  * search->zonecut_rdataset will still be valid later.
2700                  */
2701                 new_reference(search->rbtdb, node);
2702                 search->zonecut = node;
2703                 search->zonecut_rdataset = found;
2704                 search->need_cleanup = ISC_TRUE;
2705                 /*
2706                  * Since we've found a zonecut, anything beneath it is
2707                  * glue and is not subject to wildcard matching, so we
2708                  * may clear search->wild.
2709                  */
2710                 search->wild = ISC_FALSE;
2711                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2712                         /*
2713                          * If the caller does not want to find glue, then
2714                          * this is the best answer and the search should
2715                          * stop now.
2716                          */
2717                         result = DNS_R_PARTIALMATCH;
2718                 } else {
2719                         dns_name_t *zcname;
2720
2721                         /*
2722                          * The search will continue beneath the zone cut.
2723                          * This may or may not be the best match.  In case it
2724                          * is, we need to remember the node name.
2725                          */
2726                         zcname = dns_fixedname_name(&search->zonecut_name);
2727                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2728                                       ISC_R_SUCCESS);
2729                         search->copy_name = ISC_TRUE;
2730                 }
2731         } else {
2732                 /*
2733                  * There is no zonecut at this node which is active in this
2734                  * version.
2735                  *
2736                  * If this is a "wild" node and the caller hasn't disabled
2737                  * wildcard matching, remember that we've seen a wild node
2738                  * in case we need to go searching for wildcard matches
2739                  * later on.
2740                  */
2741                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2742                         search->wild = ISC_TRUE;
2743         }
2744
2745         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2746                     isc_rwlocktype_read);
2747
2748         return (result);
2749 }
2750
2751 static inline void
2752 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2753               rdatasetheader_t *header, isc_stdtime_t now,
2754               dns_rdataset_t *rdataset)
2755 {
2756         unsigned char *raw;     /* RDATASLAB */
2757
2758         /*
2759          * Caller must be holding the node reader lock.
2760          * XXXJT: technically, we need a writer lock, since we'll increment
2761          * the header count below.  However, since the actual counter value
2762          * doesn't matter, we prioritize performance here.  (We may want to
2763          * use atomic increment when available).
2764          */
2765
2766         if (rdataset == NULL)
2767                 return;
2768
2769         new_reference(rbtdb, node);
2770
2771         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
2772
2773         rdataset->methods = &rdataset_methods;
2774         rdataset->rdclass = rbtdb->common.rdclass;
2775         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2776         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2777         rdataset->ttl = header->rdh_ttl - now;
2778         rdataset->trust = header->trust;
2779         if (NEGATIVE(header))
2780                 rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
2781         if (NXDOMAIN(header))
2782                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2783         if (OPTOUT(header))
2784                 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2785         rdataset->private1 = rbtdb;
2786         rdataset->private2 = node;
2787         raw = (unsigned char *)header + sizeof(*header);
2788         rdataset->private3 = raw;
2789         rdataset->count = header->count++;
2790         if (rdataset->count == ISC_UINT32_MAX)
2791                 rdataset->count = 0;
2792
2793         /*
2794          * Reset iterator state.
2795          */
2796         rdataset->privateuint4 = 0;
2797         rdataset->private5 = NULL;
2798
2799         /*
2800          * Add noqname proof.
2801          */
2802         rdataset->private6 = header->noqname;
2803         if (rdataset->private6 != NULL)
2804                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
2805         rdataset->private7 = header->closest;
2806         if (rdataset->private7 != NULL)
2807                 rdataset->attributes |=  DNS_RDATASETATTR_CLOSEST;
2808
2809         /*
2810          * Copy out re-signing information.
2811          */
2812         if (RESIGN(header)) {
2813                 rdataset->attributes |=  DNS_RDATASETATTR_RESIGN;
2814                 rdataset->resign = header->resign;
2815         } else
2816                 rdataset->resign = 0;
2817 }
2818
2819 static inline isc_result_t
2820 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2821                  dns_name_t *foundname, dns_rdataset_t *rdataset,
2822                  dns_rdataset_t *sigrdataset)
2823 {
2824         isc_result_t result;
2825         dns_name_t *zcname;
2826         rbtdb_rdatatype_t type;
2827         dns_rbtnode_t *node;
2828
2829         /*
2830          * The caller MUST NOT be holding any node locks.
2831          */
2832
2833         node = search->zonecut;
2834         type = search->zonecut_rdataset->type;
2835
2836         /*
2837          * If we have to set foundname, we do it before anything else.
2838          * If we were to set foundname after we had set nodep or bound the
2839          * rdataset, then we'd have to undo that work if dns_name_copy()
2840          * failed.  By setting foundname first, there's nothing to undo if
2841          * we have trouble.
2842          */
2843         if (foundname != NULL && search->copy_name) {
2844                 zcname = dns_fixedname_name(&search->zonecut_name);
2845                 result = dns_name_copy(zcname, foundname, NULL);
2846                 if (result != ISC_R_SUCCESS)
2847                         return (result);
2848         }
2849         if (nodep != NULL) {
2850                 /*
2851                  * Note that we don't have to increment the node's reference
2852                  * count here because we're going to use the reference we
2853                  * already have in the search block.
2854                  */
2855                 *nodep = node;
2856                 search->need_cleanup = ISC_FALSE;
2857         }
2858         if (rdataset != NULL) {
2859                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2860                           isc_rwlocktype_read);
2861                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2862                               search->now, rdataset);
2863                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2864                         bind_rdataset(search->rbtdb, node,
2865                                       search->zonecut_sigrdataset,
2866                                       search->now, sigrdataset);
2867                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2868                             isc_rwlocktype_read);
2869         }
2870
2871         if (type == dns_rdatatype_dname)
2872                 return (DNS_R_DNAME);
2873         return (DNS_R_DELEGATION);
2874 }
2875
2876 static inline isc_boolean_t
2877 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2878            dns_rbtnode_t *node)
2879 {
2880         unsigned char *raw;     /* RDATASLAB */
2881         unsigned int count, size;
2882         dns_name_t ns_name;
2883         isc_boolean_t valid = ISC_FALSE;
2884         dns_offsets_t offsets;
2885         isc_region_t region;
2886         rdatasetheader_t *header;
2887
2888         /*
2889          * No additional locking is required.
2890          */
2891
2892         /*
2893          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
2894          * if it occurs at a zone cut, but is not valid below it.
2895          */
2896         if (type == dns_rdatatype_ns) {
2897                 if (node != search->zonecut) {
2898                         return (ISC_FALSE);
2899                 }
2900         } else if (type != dns_rdatatype_a &&
2901                    type != dns_rdatatype_aaaa &&
2902                    type != dns_rdatatype_a6) {
2903                 return (ISC_FALSE);
2904         }
2905
2906         header = search->zonecut_rdataset;
2907         raw = (unsigned char *)header + sizeof(*header);
2908         count = raw[0] * 256 + raw[1];
2909 #if DNS_RDATASET_FIXED
2910         raw += 2 + (4 * count);
2911 #else
2912         raw += 2;
2913 #endif
2914
2915         while (count > 0) {
2916                 count--;
2917                 size = raw[0] * 256 + raw[1];
2918 #if DNS_RDATASET_FIXED
2919                 raw += 4;
2920 #else
2921                 raw += 2;
2922 #endif
2923                 region.base = raw;
2924                 region.length = size;
2925                 raw += size;
2926                 /*
2927                  * XXX Until we have rdata structures, we have no choice but
2928                  * to directly access the rdata format.
2929                  */
2930                 dns_name_init(&ns_name, offsets);
2931                 dns_name_fromregion(&ns_name, &region);
2932                 if (dns_name_compare(&ns_name, name) == 0) {
2933                         valid = ISC_TRUE;
2934                         break;
2935                 }
2936         }
2937
2938         return (valid);
2939 }
2940
2941 static inline isc_boolean_t
2942 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2943             dns_name_t *name)
2944 {
2945         dns_fixedname_t fnext;
2946         dns_fixedname_t forigin;
2947         dns_name_t *next;
2948         dns_name_t *origin;
2949         dns_name_t prefix;
2950         dns_rbtdb_t *rbtdb;
2951         dns_rbtnode_t *node;
2952         isc_result_t result;
2953         isc_boolean_t answer = ISC_FALSE;
2954         rdatasetheader_t *header;
2955
2956         rbtdb = search->rbtdb;
2957
2958         dns_name_init(&prefix, NULL);
2959         dns_fixedname_init(&fnext);
2960         next = dns_fixedname_name(&fnext);
2961         dns_fixedname_init(&forigin);
2962         origin = dns_fixedname_name(&forigin);
2963
2964         result = dns_rbtnodechain_next(chain, NULL, NULL);
2965         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2966                 node = NULL;
2967                 result = dns_rbtnodechain_current(chain, &prefix,
2968                                                   origin, &node);
2969                 if (result != ISC_R_SUCCESS)
2970                         break;
2971                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2972                           isc_rwlocktype_read);
2973                 for (header = node->data;
2974                      header != NULL;
2975                      header = header->next) {
2976                         if (header->serial <= search->serial &&
2977                             !IGNORE(header) && EXISTS(header))
2978                                 break;
2979                 }
2980                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2981                             isc_rwlocktype_read);
2982                 if (header != NULL)
2983                         break;
2984                 result = dns_rbtnodechain_next(chain, NULL, NULL);
2985         }
2986         if (result == ISC_R_SUCCESS)
2987                 result = dns_name_concatenate(&prefix, origin, next, NULL);
2988         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2989                 answer = ISC_TRUE;
2990         return (answer);
2991 }
2992
2993 static inline isc_boolean_t
2994 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2995         dns_fixedname_t fnext;
2996         dns_fixedname_t forigin;
2997         dns_fixedname_t fprev;
2998         dns_name_t *next;
2999         dns_name_t *origin;
3000         dns_name_t *prev;
3001         dns_name_t name;
3002         dns_name_t rname;
3003         dns_name_t tname;
3004         dns_rbtdb_t *rbtdb;
3005         dns_rbtnode_t *node;
3006         dns_rbtnodechain_t chain;
3007         isc_boolean_t check_next = ISC_TRUE;
3008         isc_boolean_t check_prev = ISC_TRUE;
3009         isc_boolean_t answer = ISC_FALSE;
3010         isc_result_t result;
3011         rdatasetheader_t *header;
3012         unsigned int n;
3013
3014         rbtdb = search->rbtdb;
3015
3016         dns_name_init(&name, NULL);
3017         dns_name_init(&tname, NULL);
3018         dns_name_init(&rname, NULL);
3019         dns_fixedname_init(&fnext);
3020         next = dns_fixedname_name(&fnext);
3021         dns_fixedname_init(&fprev);
3022         prev = dns_fixedname_name(&fprev);
3023         dns_fixedname_init(&forigin);
3024         origin = dns_fixedname_name(&forigin);
3025
3026         /*
3027          * Find if qname is at or below a empty node.
3028          * Use our own copy of the chain.
3029          */
3030
3031         chain = search->chain;
3032         do {
3033                 node = NULL;
3034                 result = dns_rbtnodechain_current(&chain, &name,
3035                                                   origin, &node);
3036                 if (result != ISC_R_SUCCESS)
3037                         break;
3038                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3039                           isc_rwlocktype_read);
3040                 for (header = node->data;
3041                      header != NULL;
3042                      header = header->next) {
3043                         if (header->serial <= search->serial &&
3044                             !IGNORE(header) && EXISTS(header))
3045                                 break;
3046                 }
3047                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3048                             isc_rwlocktype_read);
3049                 if (header != NULL)
3050                         break;
3051                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
3052         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
3053         if (result == ISC_R_SUCCESS)
3054                 result = dns_name_concatenate(&name, origin, prev, NULL);
3055         if (result != ISC_R_SUCCESS)
3056                 check_prev = ISC_FALSE;
3057
3058         result = dns_rbtnodechain_next(&chain, NULL, NULL);
3059         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3060                 node = NULL;
3061                 result = dns_rbtnodechain_current(&chain, &name,
3062                                                   origin, &node);
3063                 if (result != ISC_R_SUCCESS)
3064                         break;
3065                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3066                           isc_rwlocktype_read);
3067                 for (header = node->data;
3068                      header != NULL;
3069                      header = header->next) {
3070                         if (header->serial <= search->serial &&
3071                             !IGNORE(header) && EXISTS(header))
3072                                 break;
3073                 }
3074                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3075                             isc_rwlocktype_read);
3076                 if (header != NULL)
3077                         break;
3078                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3079         }
3080         if (result == ISC_R_SUCCESS)
3081                 result = dns_name_concatenate(&name, origin, next, NULL);
3082         if (result != ISC_R_SUCCESS)
3083                 check_next = ISC_FALSE;
3084
3085         dns_name_clone(qname, &rname);
3086
3087         /*
3088          * Remove the wildcard label to find the terminal name.
3089          */
3090         n = dns_name_countlabels(wname);
3091         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3092
3093         do {
3094                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3095                     (check_next && dns_name_issubdomain(next, &rname))) {
3096                         answer = ISC_TRUE;
3097                         break;
3098                 }
3099                 /*
3100                  * Remove the left hand label.
3101                  */
3102                 n = dns_name_countlabels(&rname);
3103                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3104         } while (!dns_name_equal(&rname, &tname));
3105         return (answer);
3106 }
3107
3108 static inline isc_result_t
3109 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3110               dns_name_t *qname)
3111 {
3112         unsigned int i, j;
3113         dns_rbtnode_t *node, *level_node, *wnode;
3114         rdatasetheader_t *header;
3115         isc_result_t result = ISC_R_NOTFOUND;
3116         dns_name_t name;
3117         dns_name_t *wname;
3118         dns_fixedname_t fwname;
3119         dns_rbtdb_t *rbtdb;
3120         isc_boolean_t done, wild, active;
3121         dns_rbtnodechain_t wchain;
3122
3123         /*
3124          * Caller must be holding the tree lock and MUST NOT be holding
3125          * any node locks.
3126          */
3127
3128         /*
3129          * Examine each ancestor level.  If the level's wild bit
3130          * is set, then construct the corresponding wildcard name and
3131          * search for it.  If the wildcard node exists, and is active in
3132          * this version, we're done.  If not, then we next check to see
3133          * if the ancestor is active in this version.  If so, then there
3134          * can be no possible wildcard match and again we're done.  If not,
3135          * continue the search.
3136          */
3137
3138         rbtdb = search->rbtdb;
3139         i = search->chain.level_matches;
3140         done = ISC_FALSE;
3141         node = *nodep;
3142         do {
3143                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3144                           isc_rwlocktype_read);
3145
3146                 /*
3147                  * First we try to figure out if this node is active in
3148                  * the search's version.  We do this now, even though we
3149                  * may not need the information, because it simplifies the
3150                  * locking and code flow.
3151                  */
3152                 for (header = node->data;
3153                      header != NULL;
3154                      header = header->next) {
3155                         if (header->serial <= search->serial &&
3156                             !IGNORE(header) && EXISTS(header))
3157                                 break;
3158                 }
3159                 if (header != NULL)
3160                         active = ISC_TRUE;
3161                 else
3162                         active = ISC_FALSE;
3163
3164                 if (node->wild)
3165                         wild = ISC_TRUE;
3166                 else
3167                         wild = ISC_FALSE;
3168
3169                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3170                             isc_rwlocktype_read);
3171
3172                 if (wild) {
3173                         /*
3174                          * Construct the wildcard name for this level.
3175                          */
3176                         dns_name_init(&name, NULL);
3177                         dns_rbt_namefromnode(node, &name);
3178                         dns_fixedname_init(&fwname);
3179                         wname = dns_fixedname_name(&fwname);
3180                         result = dns_name_concatenate(dns_wildcardname, &name,
3181                                                       wname, NULL);
3182                         j = i;
3183                         while (result == ISC_R_SUCCESS && j != 0) {
3184                                 j--;
3185                                 level_node = search->chain.levels[j];
3186                                 dns_name_init(&name, NULL);
3187                                 dns_rbt_namefromnode(level_node, &name);
3188                                 result = dns_name_concatenate(wname,
3189                                                               &name,
3190                                                               wname,
3191                                                               NULL);
3192                         }
3193                         if (result != ISC_R_SUCCESS)
3194                                 break;
3195
3196                         wnode = NULL;
3197                         dns_rbtnodechain_init(&wchain, NULL);
3198                         result = dns_rbt_findnode(rbtdb->tree, wname,
3199                                                   NULL, &wnode, &wchain,
3200                                                   DNS_RBTFIND_EMPTYDATA,
3201                                                   NULL, NULL);
3202                         if (result == ISC_R_SUCCESS) {
3203                                 nodelock_t *lock;
3204
3205                                 /*
3206                                  * We have found the wildcard node.  If it
3207                                  * is active in the search's version, we're
3208                                  * done.
3209                                  */
3210                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
3211                                 NODE_LOCK(lock, isc_rwlocktype_read);
3212                                 for (header = wnode->data;
3213                                      header != NULL;
3214                                      header = header->next) {
3215                                         if (header->serial <= search->serial &&
3216                                             !IGNORE(header) && EXISTS(header))
3217                                                 break;
3218                                 }
3219                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3220                                 if (header != NULL ||
3221                                     activeempty(search, &wchain, wname)) {
3222                                         if (activeemtpynode(search, qname,
3223                                                             wname)) {
3224                                                 return (ISC_R_NOTFOUND);
3225                                         }
3226                                         /*
3227                                          * The wildcard node is active!
3228                                          *
3229                                          * Note: result is still ISC_R_SUCCESS
3230                                          * so we don't have to set it.
3231                                          */
3232                                         *nodep = wnode;
3233                                         break;
3234                                 }
3235                         } else if (result != ISC_R_NOTFOUND &&
3236                                    result != DNS_R_PARTIALMATCH) {
3237                                 /*
3238                                  * An error has occurred.  Bail out.
3239                                  */
3240                                 break;
3241                         }
3242                 }
3243
3244                 if (active) {
3245                         /*
3246                          * The level node is active.  Any wildcarding
3247                          * present at higher levels has no
3248                          * effect and we're done.
3249                          */
3250                         result = ISC_R_NOTFOUND;
3251                         break;
3252                 }
3253
3254                 if (i > 0) {
3255                         i--;
3256                         node = search->chain.levels[i];
3257                 } else
3258                         done = ISC_TRUE;
3259         } while (!done);
3260
3261         return (result);
3262 }
3263
3264 static isc_boolean_t
3265 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3266 {
3267         dns_rdata_t rdata = DNS_RDATA_INIT;
3268         dns_rdata_nsec3_t nsec3;
3269         unsigned char *raw;                     /* RDATASLAB */
3270         unsigned int rdlen, count;
3271         isc_region_t region;
3272         isc_result_t result;
3273
3274         REQUIRE(header->type == dns_rdatatype_nsec3);
3275
3276         raw = (unsigned char *)header + sizeof(*header);
3277         count = raw[0] * 256 + raw[1]; /* count */
3278 #if DNS_RDATASET_FIXED
3279         raw += count * 4 + 2;
3280 #else
3281         raw += 2;
3282 #endif
3283         while (count-- > 0) {
3284                 rdlen = raw[0] * 256 + raw[1];
3285 #if DNS_RDATASET_FIXED
3286                 raw += 4;
3287 #else
3288                 raw += 2;
3289 #endif
3290                 region.base = raw;
3291                 region.length = rdlen;
3292                 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3293                                      dns_rdatatype_nsec3, &region);
3294                 raw += rdlen;
3295                 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3296                 INSIST(result == ISC_R_SUCCESS);
3297                 if (nsec3.hash == search->rbtversion->hash &&
3298                     nsec3.iterations == search->rbtversion->iterations &&
3299                     nsec3.salt_length == search->rbtversion->salt_length &&
3300                     memcmp(nsec3.salt, search->rbtversion->salt,
3301                            nsec3.salt_length) == 0)
3302                         return (ISC_TRUE);
3303                 dns_rdata_reset(&rdata);
3304         }
3305         return (ISC_FALSE);
3306 }
3307
3308 /*
3309  * Find node of the NSEC/NSEC3 record that is 'name'.
3310  */
3311 static inline isc_result_t
3312 previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search,
3313                     dns_name_t *name, dns_name_t *origin,
3314                     dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain,
3315                     isc_boolean_t *firstp)
3316 {
3317         dns_fixedname_t ftarget;
3318         dns_name_t *target;
3319         dns_rbtnode_t *nsecnode;
3320         isc_result_t result;
3321
3322         REQUIRE(nodep != NULL && *nodep == NULL);
3323
3324         if (type == dns_rdatatype_nsec3) {
3325                 result = dns_rbtnodechain_prev(&search->chain, NULL, NULL);
3326                 if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN)
3327                         return (result);
3328                 result = dns_rbtnodechain_current(&search->chain, name, origin,
3329                                                   nodep);
3330                 return (result);
3331         }
3332
3333         dns_fixedname_init(&ftarget);
3334         target = dns_fixedname_name(&ftarget);
3335
3336         for (;;) {
3337                 if (*firstp) {
3338                         /*
3339                          * Construct the name of the second node to check.
3340                          * It is the first node sought in the NSEC tree.
3341                          */
3342                         *firstp = ISC_FALSE;
3343                         dns_rbtnodechain_init(nsecchain, NULL);
3344                         result = dns_name_concatenate(name, origin,
3345                                                       target, NULL);
3346                         if (result != ISC_R_SUCCESS)
3347                                 return (result);
3348                         nsecnode = NULL;
3349                         result = dns_rbt_findnode(search->rbtdb->nsec,
3350                                                   target, NULL,
3351                                                   &nsecnode, nsecchain,
3352                                                   DNS_RBTFIND_NOOPTIONS,
3353                                                   NULL, NULL);
3354                         if (result == ISC_R_SUCCESS) {
3355                                 /*
3356                                  * Since this was the first loop, finding the
3357                                  * name in the NSEC tree implies that the first
3358                                  * node checked in the main tree had an
3359                                  * unacceptable NSEC record.
3360                                  * Try the previous node in the NSEC tree.
3361                                  */
3362                                 result = dns_rbtnodechain_prev(nsecchain,
3363                                                                name, origin);
3364                                 if (result == DNS_R_NEWORIGIN)
3365                                         result = ISC_R_SUCCESS;
3366                         } else if (result == ISC_R_NOTFOUND ||
3367                                    result == DNS_R_PARTIALMATCH) {
3368                                 result = dns_rbtnodechain_current(nsecchain,
3369                                                         name, origin, NULL);
3370                                 if (result == ISC_R_NOTFOUND)
3371                                         result = ISC_R_NOMORE;
3372                         }
3373                 } else {
3374                         /*
3375                          * This is a second or later trip through the auxiliary
3376                          * tree for the name of a third or earlier NSEC node in
3377                          * the main tree.  Previous trips through the NSEC tree
3378                          * must have found nodes in the main tree with NSEC
3379                          * records.  Perhaps they lacked signature records.
3380                          */
3381                         result = dns_rbtnodechain_prev(nsecchain, name, origin);
3382                         if (result == DNS_R_NEWORIGIN)
3383                                 result = ISC_R_SUCCESS;
3384                 }
3385                 if (result != ISC_R_SUCCESS)
3386                         return (result);
3387
3388                 /*
3389                  * Construct the name to seek in the main tree.
3390                  */
3391                 result = dns_name_concatenate(name, origin, target, NULL);
3392                 if (result != ISC_R_SUCCESS)
3393                         return (result);
3394
3395                 *nodep = NULL;
3396                 result = dns_rbt_findnode(search->rbtdb->tree, target, NULL,
3397                                           nodep, &search->chain,
3398                                           DNS_RBTFIND_NOOPTIONS, NULL, NULL);
3399                 if (result == ISC_R_SUCCESS)
3400                         return (result);
3401
3402                 /*
3403                  * There should always be a node in the main tree with the
3404                  * same name as the node in the auxiliary NSEC tree, except for
3405                  * nodes in the auxiliary tree that are awaiting deletion.
3406                  */
3407                 if (result != DNS_R_PARTIALMATCH && result != ISC_R_NOTFOUND) {
3408                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
3409                                       DNS_LOGMODULE_CACHE, ISC_LOG_ERROR,
3410                                       "previous_closest_nsec(): %s",
3411                                       isc_result_totext(result));
3412                         return (DNS_R_BADDB);
3413                 }
3414         }
3415 }
3416
3417 /*
3418  * Find the NSEC/NSEC3 which is or before the current point on the
3419  * search chain.  For NSEC3 records only NSEC3 records that match the
3420  * current NSEC3PARAM record are considered.
3421  */
3422 static inline isc_result_t
3423 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3424                   dns_name_t *foundname, dns_rdataset_t *rdataset,
3425                   dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3426                   dns_db_secure_t secure)
3427 {
3428         dns_rbtnode_t *node, *prevnode;
3429         rdatasetheader_t *header, *header_next, *found, *foundsig;
3430         dns_rbtnodechain_t nsecchain;
3431         isc_boolean_t empty_node;
3432         isc_result_t result;
3433         dns_fixedname_t fname, forigin;
3434         dns_name_t *name, *origin;
3435         dns_rdatatype_t type;
3436         rbtdb_rdatatype_t sigtype;
3437         isc_boolean_t wraps;
3438         isc_boolean_t first = ISC_TRUE;
3439         isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3440
3441         if (tree == search->rbtdb->nsec3) {
3442                 type = dns_rdatatype_nsec3;
3443                 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3444                 wraps = ISC_TRUE;
3445         } else {
3446                 type = dns_rdatatype_nsec;
3447                 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3448                 wraps = ISC_FALSE;
3449         }
3450
3451         /*
3452          * Use the auxiliary tree only starting with the second node in the
3453          * hope that the original node will be right much of the time.
3454          */
3455         dns_fixedname_init(&fname);
3456         name = dns_fixedname_name(&fname);
3457         dns_fixedname_init(&forigin);
3458         origin = dns_fixedname_name(&forigin);
3459  again:
3460         node = NULL;
3461         prevnode = NULL;
3462         result = dns_rbtnodechain_current(&search->chain, name, origin, &node);
3463         if (result != ISC_R_SUCCESS)
3464                 return (result);
3465         do {
3466                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3467                           isc_rwlocktype_read);
3468                 found = NULL;
3469                 foundsig = NULL;
3470                 empty_node = ISC_TRUE;
3471                 for (header = node->data;
3472                      header != NULL;
3473                      header = header_next) {
3474                         header_next = header->next;
3475                         /*
3476                          * Look for an active, extant NSEC or RRSIG NSEC.
3477                          */
3478                         do {
3479                                 if (header->serial <= search->serial &&
3480                                     !IGNORE(header)) {
3481                                         /*
3482                                          * Is this a "this rdataset doesn't
3483                                          * exist" record?
3484                                          */
3485                                         if (NONEXISTENT(header))
3486                                                 header = NULL;
3487                                         break;
3488                                 } else
3489                                         header = header->down;
3490                         } while (header != NULL);
3491                         if (header != NULL) {
3492                                 /*
3493                                  * We now know that there is at least one
3494                                  * active rdataset at this node.
3495                                  */
3496                                 empty_node = ISC_FALSE;
3497                                 if (header->type == type) {
3498                                         found = header;
3499                                         if (foundsig != NULL)
3500                                                 break;
3501                                 } else if (header->type == sigtype) {
3502                                         foundsig = header;
3503                                         if (found != NULL)
3504                                                 break;
3505                                 }
3506                         }
3507                 }
3508                 if (!empty_node) {
3509                         if (found != NULL && search->rbtversion->havensec3 &&
3510                             found->type == dns_rdatatype_nsec3 &&
3511                             !matchparams(found, search)) {
3512                                 empty_node = ISC_TRUE;
3513                                 found = NULL;
3514                                 foundsig = NULL;
3515                                 result = previous_closest_nsec(type, search,
3516                                                                name, origin,
3517                                                                &prevnode, NULL,
3518                                                                NULL);
3519                         } else if (found != NULL &&
3520                                    (foundsig != NULL || !need_sig)) {
3521                                 /*
3522                                  * We've found the right NSEC/NSEC3 record.
3523                                  *
3524                                  * Note: for this to really be the right
3525                                  * NSEC record, it's essential that the NSEC
3526                                  * records of any nodes obscured by a zone
3527                                  * cut have been removed; we assume this is
3528                                  * the case.
3529                                  */
3530                                 result = dns_name_concatenate(name, origin,
3531                                                               foundname, NULL);
3532                                 if (result == ISC_R_SUCCESS) {
3533                                         if (nodep != NULL) {
3534                                                 new_reference(search->rbtdb,
3535                                                               node);
3536                                                 *nodep = node;
3537                                         }
3538                                         bind_rdataset(search->rbtdb, node,
3539                                                       found, search->now,
3540                                                       rdataset);
3541                                         if (foundsig != NULL)
3542                                                 bind_rdataset(search->rbtdb,
3543                                                               node,
3544                                                               foundsig,
3545                                                               search->now,
3546                                                               sigrdataset);
3547                                 }
3548                         } else if (found == NULL && foundsig == NULL) {
3549                                 /*
3550                                  * This node is active, but has no NSEC or
3551                                  * RRSIG NSEC.  That means it's glue or
3552                                  * other obscured zone data that isn't
3553                                  * relevant for our search.  Treat the
3554                                  * node as if it were empty and keep looking.
3555                                  */
3556                                 empty_node = ISC_TRUE;
3557                                 result = previous_closest_nsec(type, search,
3558                                                                name, origin,
3559                                                                &prevnode,
3560                                                                &nsecchain,
3561                                                                &first);
3562                         } else {
3563                                 /*
3564                                  * We found an active node, but either the
3565                                  * NSEC or the RRSIG NSEC is missing.  This
3566                                  * shouldn't happen.
3567                                  */
3568                                 result = DNS_R_BADDB;
3569                         }
3570                 } else {
3571                         /*
3572                          * This node isn't active.  We've got to keep
3573                          * looking.
3574                          */
3575                         result = previous_closest_nsec(type, search,
3576                                                        name, origin, &prevnode,
3577                                                        &nsecchain, &first);
3578                 }
3579                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3580                             isc_rwlocktype_read);
3581                 node = prevnode;
3582                 prevnode = NULL;
3583         } while (empty_node && result == ISC_R_SUCCESS);
3584
3585         if (!first)
3586                 dns_rbtnodechain_invalidate(&nsecchain);
3587
3588         if (result == ISC_R_NOMORE && wraps) {
3589                 result = dns_rbtnodechain_last(&search->chain, tree,
3590                                                NULL, NULL);
3591                 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3592                         wraps = ISC_FALSE;
3593                         goto again;
3594                 }
3595         }
3596
3597         /*
3598          * If the result is ISC_R_NOMORE, then we got to the beginning of
3599          * the database and didn't find a NSEC record.  This shouldn't
3600          * happen.
3601          */
3602         if (result == ISC_R_NOMORE)
3603                 result = DNS_R_BADDB;
3604
3605         return (result);
3606 }
3607
3608 static isc_result_t
3609 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3610           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3611           dns_dbnode_t **nodep, dns_name_t *foundname,
3612           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3613 {
3614         dns_rbtnode_t *node = NULL;
3615         isc_result_t result;
3616         rbtdb_search_t search;
3617         isc_boolean_t cname_ok = ISC_TRUE;
3618         isc_boolean_t close_version = ISC_FALSE;
3619         isc_boolean_t maybe_zonecut = ISC_FALSE;
3620         isc_boolean_t at_zonecut = ISC_FALSE;
3621         isc_boolean_t wild;
3622         isc_boolean_t empty_node;
3623         rdatasetheader_t *header, *header_next, *found, *nsecheader;
3624         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3625         rbtdb_rdatatype_t sigtype;
3626         isc_boolean_t active;
3627         dns_rbtnodechain_t chain;
3628         nodelock_t *lock;
3629         dns_rbt_t *tree;
3630
3631         search.rbtdb = (dns_rbtdb_t *)db;
3632
3633         REQUIRE(VALID_RBTDB(search.rbtdb));
3634         INSIST(version == NULL ||
3635                ((rbtdb_version_t *)version)->rbtdb == (dns_rbtdb_t *)db);
3636
3637         /*
3638          * We don't care about 'now'.
3639          */
3640         UNUSED(now);
3641
3642         /*
3643          * If the caller didn't supply a version, attach to the current
3644          * version.
3645          */
3646         if (version == NULL) {
3647                 currentversion(db, &version);
3648                 close_version = ISC_TRUE;
3649         }
3650
3651         search.rbtversion = version;
3652         search.serial = search.rbtversion->serial;
3653         search.options = options;
3654         search.copy_name = ISC_FALSE;
3655         search.need_cleanup = ISC_FALSE;
3656         search.wild = ISC_FALSE;
3657         search.zonecut = NULL;
3658         dns_fixedname_init(&search.zonecut_name);
3659         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3660         search.now = 0;
3661
3662         /*
3663          * 'wild' will be true iff. we've matched a wildcard.
3664          */
3665         wild = ISC_FALSE;
3666
3667         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3668
3669         /*
3670          * Search down from the root of the tree.  If, while going down, we
3671          * encounter a callback node, zone_zonecut_callback() will search the
3672          * rdatasets at the zone cut for active DNAME or NS rdatasets.
3673          */
3674         tree =  (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3675                                                          search.rbtdb->tree;
3676         result = dns_rbt_findnode(tree, name, foundname, &node,
3677                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
3678                                   zone_zonecut_callback, &search);
3679
3680         if (result == DNS_R_PARTIALMATCH) {
3681         partial_match:
3682                 if (search.zonecut != NULL) {
3683                     result = setup_delegation(&search, nodep, foundname,
3684                                               rdataset, sigrdataset);
3685                     goto tree_exit;
3686                 }
3687
3688                 if (search.wild) {
3689                         /*
3690                          * At least one of the levels in the search chain
3691                          * potentially has a wildcard.  For each such level,
3692                          * we must see if there's a matching wildcard active
3693                          * in the current version.
3694                          */
3695                         result = find_wildcard(&search, &node, name);
3696                         if (result == ISC_R_SUCCESS) {
3697                                 result = dns_name_copy(name, foundname, NULL);
3698                                 if (result != ISC_R_SUCCESS)
3699                                         goto tree_exit;
3700                                 wild = ISC_TRUE;
3701                                 goto found;
3702                         }
3703                         else if (result != ISC_R_NOTFOUND)
3704                                 goto tree_exit;
3705                 }
3706
3707                 chain = search.chain;
3708                 active = activeempty(&search, &chain, name);
3709
3710                 /*
3711                  * If we're here, then the name does not exist, is not
3712                  * beneath a zonecut, and there's no matching wildcard.
3713                  */
3714                 if ((search.rbtversion->secure == dns_db_secure &&
3715                      !search.rbtversion->havensec3) ||
3716                     (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3717                     (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3718                 {
3719                         result = find_closest_nsec(&search, nodep, foundname,
3720                                                    rdataset, sigrdataset, tree,
3721                                                    search.rbtversion->secure);
3722                         if (result == ISC_R_SUCCESS)
3723                                 result = active ? DNS_R_EMPTYNAME :
3724                                                   DNS_R_NXDOMAIN;
3725                 } else
3726                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3727                 goto tree_exit;
3728         } else if (result != ISC_R_SUCCESS)
3729                 goto tree_exit;
3730
3731  found:
3732         /*
3733          * We have found a node whose name is the desired name, or we
3734          * have matched a wildcard.
3735          */
3736
3737         if (search.zonecut != NULL) {
3738                 /*
3739                  * If we're beneath a zone cut, we don't want to look for
3740                  * CNAMEs because they're not legitimate zone glue.
3741                  */
3742                 cname_ok = ISC_FALSE;
3743         } else {
3744                 /*
3745                  * The node may be a zone cut itself.  If it might be one,
3746                  * make sure we check for it later.
3747                  *
3748                  * DS records live above the zone cut in ordinary zone so
3749                  * we want to ignore any referral.
3750                  *
3751                  * Stub zones don't have anything "above" the delgation so
3752                  * we always return a referral.
3753                  */
3754                 if (node->find_callback &&
3755                     ((node != search.rbtdb->origin_node &&
3756                       !dns_rdatatype_atparent(type)) ||
3757                      IS_STUB(search.rbtdb)))
3758                         maybe_zonecut = ISC_TRUE;
3759         }
3760
3761         /*
3762          * Certain DNSSEC types are not subject to CNAME matching
3763          * (RFC4035, section 2.5 and RFC3007).
3764          *
3765          * We don't check for RRSIG, because we don't store RRSIG records
3766          * directly.
3767          */
3768         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3769                 cname_ok = ISC_FALSE;
3770
3771         /*
3772          * We now go looking for rdata...
3773          */
3774
3775         lock = &search.rbtdb->node_locks[node->locknum].lock;
3776         NODE_LOCK(lock, isc_rwlocktype_read);
3777
3778         found = NULL;
3779         foundsig = NULL;
3780         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3781         nsecheader = NULL;
3782         nsecsig = NULL;
3783         cnamesig = NULL;
3784         empty_node = ISC_TRUE;
3785         for (header = node->data; header != NULL; header = header_next) {
3786                 header_next = header->next;
3787                 /*
3788                  * Look for an active, extant rdataset.
3789                  */
3790                 do {
3791                         if (header->serial <= search.serial &&
3792                             !IGNORE(header)) {
3793                                 /*
3794                                  * Is this a "this rdataset doesn't
3795                                  * exist" record?
3796                                  */
3797                                 if (NONEXISTENT(header))
3798                                         header = NULL;
3799                                 break;
3800                         } else
3801                                 header = header->down;
3802                 } while (header != NULL);
3803                 if (header != NULL) {
3804                         /*
3805                          * We now know that there is at least one active
3806                          * rdataset at this node.
3807                          */
3808                         empty_node = ISC_FALSE;
3809
3810                         /*
3811                          * Do special zone cut handling, if requested.
3812                          */
3813                         if (maybe_zonecut &&
3814                             header->type == dns_rdatatype_ns) {
3815                                 /*
3816                                  * We increment the reference count on node to
3817                                  * ensure that search->zonecut_rdataset will
3818                                  * still be valid later.
3819                                  */
3820                                 new_reference(search.rbtdb, node);
3821                                 search.zonecut = node;
3822                                 search.zonecut_rdataset = header;
3823                                 search.zonecut_sigrdataset = NULL;
3824                                 search.need_cleanup = ISC_TRUE;
3825                                 maybe_zonecut = ISC_FALSE;
3826                                 at_zonecut = ISC_TRUE;
3827                                 /*
3828                                  * It is not clear if KEY should still be
3829                                  * allowed at the parent side of the zone
3830                                  * cut or not.  It is needed for RFC3007
3831                                  * validated updates.
3832                                  */
3833                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3834                                     && type != dns_rdatatype_nsec
3835                                     && type != dns_rdatatype_key) {
3836                                         /*
3837                                          * Glue is not OK, but any answer we
3838                                          * could return would be glue.  Return
3839                                          * the delegation.
3840                                          */
3841                                         found = NULL;
3842                                         break;
3843                                 }
3844                                 if (found != NULL && foundsig != NULL)
3845                                         break;
3846                         }
3847
3848
3849                         /*
3850                          * If the NSEC3 record doesn't match the chain
3851                          * we are using behave as if it isn't here.
3852                          */
3853                         if (header->type == dns_rdatatype_nsec3 &&
3854                            !matchparams(header, &search)) {
3855                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3856                                 goto partial_match;
3857                         }
3858                         /*
3859                          * If we found a type we were looking for,
3860                          * remember it.
3861                          */
3862                         if (header->type == type ||
3863                             type == dns_rdatatype_any ||
3864                             (header->type == dns_rdatatype_cname &&
3865                              cname_ok)) {
3866                                 /*
3867                                  * We've found the answer!
3868                                  */
3869                                 found = header;
3870                                 if (header->type == dns_rdatatype_cname &&
3871                                     cname_ok) {
3872                                         /*
3873                                          * We may be finding a CNAME instead
3874                                          * of the desired type.
3875                                          *
3876                                          * If we've already got the CNAME RRSIG,
3877                                          * use it, otherwise change sigtype
3878                                          * so that we find it.
3879                                          */
3880                                         if (cnamesig != NULL)
3881                                                 foundsig = cnamesig;
3882                                         else
3883                                                 sigtype =
3884                                                     RBTDB_RDATATYPE_SIGCNAME;
3885                                 }
3886                                 /*
3887                                  * If we've got all we need, end the search.
3888                                  */
3889                                 if (!maybe_zonecut && foundsig != NULL)
3890                                         break;
3891                         } else if (header->type == sigtype) {
3892                                 /*
3893                                  * We've found the RRSIG rdataset for our
3894                                  * target type.  Remember it.
3895                                  */
3896                                 foundsig = header;
3897                                 /*
3898                                  * If we've got all we need, end the search.
3899                                  */
3900                                 if (!maybe_zonecut && found != NULL)
3901                                         break;
3902                         } else if (header->type == dns_rdatatype_nsec &&
3903                                    !search.rbtversion->havensec3) {
3904                                 /*
3905                                  * Remember a NSEC rdataset even if we're
3906                                  * not specifically looking for it, because
3907                                  * we might need it later.
3908                                  */
3909                                 nsecheader = header;
3910                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3911                                    !search.rbtversion->havensec3) {
3912                                 /*
3913                                  * If we need the NSEC rdataset, we'll also
3914                                  * need its signature.
3915                                  */
3916                                 nsecsig = header;
3917                         } else if (cname_ok &&
3918                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3919                                 /*
3920                                  * If we get a CNAME match, we'll also need
3921                                  * its signature.
3922                                  */
3923                                 cnamesig = header;
3924                         }
3925                 }
3926         }
3927
3928         if (empty_node) {
3929                 /*
3930                  * We have an exact match for the name, but there are no
3931                  * active rdatasets in the desired version.  That means that
3932                  * this node doesn't exist in the desired version, and that
3933                  * we really have a partial match.
3934                  */
3935                 if (!wild) {
3936                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3937                         goto partial_match;
3938                 }
3939         }
3940
3941         /*
3942          * If we didn't find what we were looking for...
3943          */
3944         if (found == NULL) {
3945                 if (search.zonecut != NULL) {
3946                         /*
3947                          * We were trying to find glue at a node beneath a
3948                          * zone cut, but didn't.
3949                          *
3950                          * Return the delegation.
3951                          */
3952                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3953                         result = setup_delegation(&search, nodep, foundname,
3954                                                   rdataset, sigrdataset);
3955                         goto tree_exit;
3956                 }
3957                 /*
3958                  * The desired type doesn't exist.
3959                  */
3960                 result = DNS_R_NXRRSET;
3961                 if (search.rbtversion->secure == dns_db_secure &&
3962                     !search.rbtversion->havensec3 &&
3963                     (nsecheader == NULL || nsecsig == NULL)) {
3964                         /*
3965                          * The zone is secure but there's no NSEC,
3966                          * or the NSEC has no signature!
3967                          */
3968                         if (!wild) {
3969                                 result = DNS_R_BADDB;
3970                                 goto node_exit;
3971                         }
3972
3973                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3974                         result = find_closest_nsec(&search, nodep, foundname,
3975                                                    rdataset, sigrdataset,
3976                                                    search.rbtdb->tree,
3977                                                    search.rbtversion->secure);
3978                         if (result == ISC_R_SUCCESS)
3979                                 result = DNS_R_EMPTYWILD;
3980                         goto tree_exit;
3981                 }
3982                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3983                     nsecheader == NULL)
3984                 {
3985                         /*
3986                          * There's no NSEC record, and we were told
3987                          * to find one.
3988                          */
3989                         result = DNS_R_BADDB;
3990                         goto node_exit;
3991                 }
3992                 if (nodep != NULL) {
3993                         new_reference(search.rbtdb, node);
3994                         *nodep = node;
3995                 }
3996                 if ((search.rbtversion->secure == dns_db_secure &&
3997                      !search.rbtversion->havensec3) ||
3998                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
3999                 {
4000                         bind_rdataset(search.rbtdb, node, nsecheader,
4001                                       0, rdataset);
4002                         if (nsecsig != NULL)
4003                                 bind_rdataset(search.rbtdb, node,
4004                                               nsecsig, 0, sigrdataset);
4005                 }
4006                 if (wild)
4007                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4008                 goto node_exit;
4009         }
4010
4011         /*
4012          * We found what we were looking for, or we found a CNAME.
4013          */
4014
4015         if (type != found->type &&
4016             type != dns_rdatatype_any &&
4017             found->type == dns_rdatatype_cname) {
4018                 /*
4019                  * We weren't doing an ANY query and we found a CNAME instead
4020                  * of the type we were looking for, so we need to indicate
4021                  * that result to the caller.
4022                  */
4023                 result = DNS_R_CNAME;
4024         } else if (search.zonecut != NULL) {
4025                 /*
4026                  * If we're beneath a zone cut, we must indicate that the
4027                  * result is glue, unless we're actually at the zone cut
4028                  * and the type is NSEC or KEY.
4029                  */
4030                 if (search.zonecut == node) {
4031                         /*
4032                          * It is not clear if KEY should still be
4033                          * allowed at the parent side of the zone
4034                          * cut or not.  It is needed for RFC3007
4035                          * validated updates.
4036                          */
4037                         if (type == dns_rdatatype_nsec ||
4038                             type == dns_rdatatype_nsec3 ||
4039                             type == dns_rdatatype_key)
4040                                 result = ISC_R_SUCCESS;
4041                         else if (type == dns_rdatatype_any)
4042                                 result = DNS_R_ZONECUT;
4043                         else
4044                                 result = DNS_R_GLUE;
4045                 } else
4046                         result = DNS_R_GLUE;
4047                 /*
4048                  * We might have found data that isn't glue, but was occluded
4049                  * by a dynamic update.  If the caller cares about this, they
4050                  * will have told us to validate glue.
4051                  *
4052                  * XXX We should cache the glue validity state!
4053                  */
4054                 if (result == DNS_R_GLUE &&
4055                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
4056                     !valid_glue(&search, foundname, type, node)) {
4057                         NODE_UNLOCK(lock, isc_rwlocktype_read);
4058                         result = setup_delegation(&search, nodep, foundname,
4059                                                   rdataset, sigrdataset);
4060                     goto tree_exit;
4061                 }
4062         } else {
4063                 /*
4064                  * An ordinary successful query!
4065                  */
4066                 result = ISC_R_SUCCESS;
4067         }
4068
4069         if (nodep != NULL) {
4070                 if (!at_zonecut)
4071                         new_reference(search.rbtdb, node);
4072                 else
4073                         search.need_cleanup = ISC_FALSE;
4074                 *nodep = node;
4075         }
4076
4077         if (type != dns_rdatatype_any) {
4078                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
4079                 if (foundsig != NULL)
4080                         bind_rdataset(search.rbtdb, node, foundsig, 0,
4081                                       sigrdataset);
4082         }
4083
4084         if (wild)
4085                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4086
4087  node_exit:
4088         NODE_UNLOCK(lock, isc_rwlocktype_read);
4089
4090  tree_exit:
4091         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4092
4093         /*
4094          * If we found a zonecut but aren't going to use it, we have to
4095          * let go of it.
4096          */
4097         if (search.need_cleanup) {
4098                 node = search.zonecut;
4099                 INSIST(node != NULL);
4100                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4101
4102                 NODE_LOCK(lock, isc_rwlocktype_read);
4103                 decrement_reference(search.rbtdb, node, 0,
4104                                     isc_rwlocktype_read, isc_rwlocktype_none,
4105                                     ISC_FALSE);
4106                 NODE_UNLOCK(lock, isc_rwlocktype_read);
4107         }
4108
4109         if (close_version)
4110                 closeversion(db, &version, ISC_FALSE);
4111
4112         dns_rbtnodechain_reset(&search.chain);
4113
4114         return (result);
4115 }
4116
4117 static isc_result_t
4118 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4119                  isc_stdtime_t now, dns_dbnode_t **nodep,
4120                  dns_name_t *foundname,
4121                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4122 {
4123         UNUSED(db);
4124         UNUSED(name);
4125         UNUSED(options);
4126         UNUSED(now);
4127         UNUSED(nodep);
4128         UNUSED(foundname);
4129         UNUSED(rdataset);
4130         UNUSED(sigrdataset);
4131
4132         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
4133
4134         /* NOTREACHED */
4135         return (ISC_R_NOTIMPLEMENTED);
4136 }
4137
4138 static isc_result_t
4139 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
4140         rbtdb_search_t *search = arg;
4141         rdatasetheader_t *header, *header_prev, *header_next;
4142         rdatasetheader_t *dname_header, *sigdname_header;
4143         isc_result_t result;
4144         nodelock_t *lock;
4145         isc_rwlocktype_t locktype;
4146
4147         /* XXX comment */
4148
4149         REQUIRE(search->zonecut == NULL);
4150
4151         /*
4152          * Keep compiler silent.
4153          */
4154         UNUSED(name);
4155
4156         lock = &(search->rbtdb->node_locks[node->locknum].lock);
4157         locktype = isc_rwlocktype_read;
4158         NODE_LOCK(lock, locktype);
4159
4160         /*
4161          * Look for a DNAME or RRSIG DNAME rdataset.
4162          */
4163         dname_header = NULL;
4164         sigdname_header = NULL;
4165         header_prev = NULL;
4166         for (header = node->data; header != NULL; header = header_next) {
4167                 header_next = header->next;
4168                 if (header->rdh_ttl <= search->now) {
4169                         /*
4170                          * This rdataset is stale.  If no one else is
4171                          * using the node, we can clean it up right
4172                          * now, otherwise we mark it as stale, and
4173                          * the node as dirty, so it will get cleaned
4174                          * up later.
4175                          */
4176                         if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
4177                             (locktype == isc_rwlocktype_write ||
4178                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4179                                 /*
4180                                  * We update the node's status only when we
4181                                  * can get write access; otherwise, we leave
4182                                  * others to this work.  Periodical cleaning
4183                                  * will eventually take the job as the last
4184                                  * resort.
4185                                  * We won't downgrade the lock, since other
4186                                  * rdatasets are probably stale, too.
4187                                  */
4188                                 locktype = isc_rwlocktype_write;
4189
4190                                 if (dns_rbtnode_refcurrent(node) == 0) {
4191                                         isc_mem_t *mctx;
4192
4193                                         /*
4194                                          * header->down can be non-NULL if the
4195                                          * refcount has just decremented to 0
4196                                          * but decrement_reference() has not
4197                                          * performed clean_cache_node(), in
4198                                          * which case we need to purge the
4199                                          * stale headers first.
4200                                          */
4201                                         mctx = search->rbtdb->common.mctx;
4202                                         clean_stale_headers(search->rbtdb,
4203                                                             mctx,
4204                                                             header);
4205                                         if (header_prev != NULL)
4206                                                 header_prev->next =
4207                                                         header->next;
4208                                         else
4209                                                 node->data = header->next;
4210                                         free_rdataset(search->rbtdb, mctx,
4211                                                       header);
4212                                 } else {
4213                                         header->attributes |=
4214                                                 RDATASET_ATTR_STALE;
4215                                         node->dirty = 1;
4216                                         header_prev = header;
4217                                 }
4218                         } else
4219                                 header_prev = header;
4220                 } else if (header->type == dns_rdatatype_dname &&
4221                            EXISTS(header)) {
4222                         dname_header = header;
4223                         header_prev = header;
4224                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4225                          EXISTS(header)) {
4226                         sigdname_header = header;
4227                         header_prev = header;
4228                 } else
4229                         header_prev = header;
4230         }
4231
4232         if (dname_header != NULL &&
4233             (!DNS_TRUST_PENDING(dname_header->trust) ||
4234              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4235                 /*
4236                  * We increment the reference count on node to ensure that
4237                  * search->zonecut_rdataset will still be valid later.
4238                  */
4239                 new_reference(search->rbtdb, node);
4240                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4241                 search->zonecut = node;
4242                 search->zonecut_rdataset = dname_header;
4243                 search->zonecut_sigrdataset = sigdname_header;
4244                 search->need_cleanup = ISC_TRUE;
4245                 result = DNS_R_PARTIALMATCH;
4246         } else
4247                 result = DNS_R_CONTINUE;
4248
4249         NODE_UNLOCK(lock, locktype);
4250
4251         return (result);
4252 }
4253
4254 static inline isc_result_t
4255 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4256                      dns_dbnode_t **nodep, dns_name_t *foundname,
4257                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4258 {
4259         unsigned int i;
4260         dns_rbtnode_t *level_node;
4261         rdatasetheader_t *header, *header_prev, *header_next;
4262         rdatasetheader_t *found, *foundsig;
4263         isc_result_t result = ISC_R_NOTFOUND;
4264         dns_name_t name;
4265         dns_rbtdb_t *rbtdb;
4266         isc_boolean_t done;
4267         nodelock_t *lock;
4268         isc_rwlocktype_t locktype;
4269
4270         /*
4271          * Caller must be holding the tree lock.
4272          */
4273
4274         rbtdb = search->rbtdb;
4275         i = search->chain.level_matches;
4276         done = ISC_FALSE;
4277         do {
4278                 locktype = isc_rwlocktype_read;
4279                 lock = &rbtdb->node_locks[node->locknum].lock;
4280                 NODE_LOCK(lock, locktype);
4281
4282                 /*
4283                  * Look for NS and RRSIG NS rdatasets.
4284                  */
4285                 found = NULL;
4286                 foundsig = NULL;
4287                 header_prev = NULL;
4288                 for (header = node->data;
4289                      header != NULL;
4290                      header = header_next) {
4291                         header_next = header->next;
4292                         if (header->rdh_ttl <= search->now) {
4293                                 /*
4294                                  * This rdataset is stale.  If no one else is
4295                                  * using the node, we can clean it up right
4296                                  * now, otherwise we mark it as stale, and
4297                                  * the node as dirty, so it will get cleaned
4298                                  * up later.
4299                                  */
4300                                 if ((header->rdh_ttl <= search->now -
4301                                                     RBTDB_VIRTUAL) &&
4302                                     (locktype == isc_rwlocktype_write ||
4303                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4304                                         /*
4305                                          * We update the node's status only
4306                                          * when we can get write access.
4307                                          */
4308                                         locktype = isc_rwlocktype_write;
4309
4310                                         if (dns_rbtnode_refcurrent(node)
4311                                             == 0) {
4312                                                 isc_mem_t *m;
4313
4314                                                 m = search->rbtdb->common.mctx;
4315                                                 clean_stale_headers(
4316                                                         search->rbtdb,
4317                                                         m, header);
4318                                                 if (header_prev != NULL)
4319                                                         header_prev->next =
4320                                                                 header->next;
4321                                                 else
4322                                                         node->data =
4323                                                                 header->next;
4324                                                 free_rdataset(rbtdb, m,
4325                                                               header);
4326                                         } else {
4327                                                 header->attributes |=
4328                                                         RDATASET_ATTR_STALE;
4329                                                 node->dirty = 1;
4330                                                 header_prev = header;
4331                                         }
4332                                 } else
4333                                         header_prev = header;
4334                         } else if (EXISTS(header)) {
4335                                 /*
4336                                  * We've found an extant rdataset.  See if
4337                                  * we're interested in it.
4338                                  */
4339                                 if (header->type == dns_rdatatype_ns) {
4340                                         found = header;
4341                                         if (foundsig != NULL)
4342                                                 break;
4343                                 } else if (header->type ==
4344                                            RBTDB_RDATATYPE_SIGNS) {
4345                                         foundsig = header;
4346                                         if (found != NULL)
4347                                                 break;
4348                                 }
4349                                 header_prev = header;
4350                         } else
4351                                 header_prev = header;
4352                 }
4353
4354                 if (found != NULL) {
4355                         /*
4356                          * If we have to set foundname, we do it before
4357                          * anything else.  If we were to set foundname after
4358                          * we had set nodep or bound the rdataset, then we'd
4359                          * have to undo that work if dns_name_concatenate()
4360                          * failed.  By setting foundname first, there's
4361                          * nothing to undo if we have trouble.
4362                          */
4363                         if (foundname != NULL) {
4364                                 dns_name_init(&name, NULL);
4365                                 dns_rbt_namefromnode(node, &name);
4366                                 result = dns_name_copy(&name, foundname, NULL);
4367                                 while (result == ISC_R_SUCCESS && i > 0) {
4368                                         i--;
4369                                         level_node = search->chain.levels[i];
4370                                         dns_name_init(&name, NULL);
4371                                         dns_rbt_namefromnode(level_node,
4372                                                              &name);
4373                                         result =
4374                                                 dns_name_concatenate(foundname,
4375                                                                      &name,
4376                                                                      foundname,
4377                                                                      NULL);
4378                                 }
4379                                 if (result != ISC_R_SUCCESS) {
4380                                         *nodep = NULL;
4381                                         goto node_exit;
4382                                 }
4383                         }
4384                         result = DNS_R_DELEGATION;
4385                         if (nodep != NULL) {
4386                                 new_reference(search->rbtdb, node);
4387                                 *nodep = node;
4388                         }
4389                         bind_rdataset(search->rbtdb, node, found, search->now,
4390                                       rdataset);
4391                         if (foundsig != NULL)
4392                                 bind_rdataset(search->rbtdb, node, foundsig,
4393                                               search->now, sigrdataset);
4394                         if (need_headerupdate(found, search->now) ||
4395                             (foundsig != NULL &&
4396                              need_headerupdate(foundsig, search->now))) {
4397                                 if (locktype != isc_rwlocktype_write) {
4398                                         NODE_UNLOCK(lock, locktype);
4399                                         NODE_LOCK(lock, isc_rwlocktype_write);
4400                                         locktype = isc_rwlocktype_write;
4401                                         POST(locktype);
4402                                 }
4403                                 if (need_headerupdate(found, search->now))
4404                                         update_header(search->rbtdb, found,
4405                                                       search->now);
4406                                 if (foundsig != NULL &&
4407                                     need_headerupdate(foundsig, search->now)) {
4408                                         update_header(search->rbtdb, foundsig,
4409                                                       search->now);
4410                                 }
4411                         }
4412                 }
4413
4414         node_exit:
4415                 NODE_UNLOCK(lock, locktype);
4416
4417                 if (found == NULL && i > 0) {
4418                         i--;
4419                         node = search->chain.levels[i];
4420                 } else
4421                         done = ISC_TRUE;
4422
4423         } while (!done);
4424
4425         return (result);
4426 }
4427
4428 static isc_result_t
4429 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4430                   isc_stdtime_t now, dns_name_t *foundname,
4431                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4432 {
4433         dns_rbtnode_t *node;
4434         rdatasetheader_t *header, *header_next, *header_prev;
4435         rdatasetheader_t *found, *foundsig;
4436         isc_boolean_t empty_node;
4437         isc_result_t result;
4438         dns_fixedname_t fname, forigin;
4439         dns_name_t *name, *origin;
4440         rbtdb_rdatatype_t matchtype, sigmatchtype;
4441         nodelock_t *lock;
4442         isc_rwlocktype_t locktype;
4443
4444         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4445         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4446                                              dns_rdatatype_nsec);
4447
4448         do {
4449                 node = NULL;
4450                 dns_fixedname_init(&fname);
4451                 name = dns_fixedname_name(&fname);
4452                 dns_fixedname_init(&forigin);
4453                 origin = dns_fixedname_name(&forigin);
4454                 result = dns_rbtnodechain_current(&search->chain, name,
4455                                                   origin, &node);
4456                 if (result != ISC_R_SUCCESS)
4457                         return (result);
4458                 locktype = isc_rwlocktype_read;
4459                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4460                 NODE_LOCK(lock, locktype);
4461                 found = NULL;
4462                 foundsig = NULL;
4463                 empty_node = ISC_TRUE;
4464                 header_prev = NULL;
4465                 for (header = node->data;
4466                      header != NULL;
4467                      header = header_next) {
4468                         header_next = header->next;
4469                         if (header->rdh_ttl <= now) {
4470                                 /*
4471                                  * This rdataset is stale.  If no one else is
4472                                  * using the node, we can clean it up right
4473                                  * now, otherwise we mark it as stale, and the
4474                                  * node as dirty, so it will get cleaned up
4475                                  * later.
4476                                  */
4477                                 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4478                                     (locktype == isc_rwlocktype_write ||
4479                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4480                                         /*
4481                                          * We update the node's status only
4482                                          * when we can get write access.
4483                                          */
4484                                         locktype = isc_rwlocktype_write;
4485
4486                                         if (dns_rbtnode_refcurrent(node)
4487                                             == 0) {
4488                                                 isc_mem_t *m;
4489
4490                                                 m = search->rbtdb->common.mctx;
4491                                                 clean_stale_headers(
4492                                                         search->rbtdb,
4493                                                         m, header);
4494                                                 if (header_prev != NULL)
4495                                                         header_prev->next =
4496                                                                 header->next;
4497                                                 else
4498                                                         node->data = header->next;
4499                                                 free_rdataset(search->rbtdb, m,
4500                                                               header);
4501                                         } else {
4502                                                 header->attributes |=
4503                                                         RDATASET_ATTR_STALE;
4504                                                 node->dirty = 1;
4505                                                 header_prev = header;
4506                                         }
4507                                 } else
4508                                         header_prev = header;
4509                                 continue;
4510                         }
4511                         if (NONEXISTENT(header) ||
4512                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
4513                                 header_prev = header;
4514                                 continue;
4515                         }
4516                         empty_node = ISC_FALSE;
4517                         if (header->type == matchtype)
4518                                 found = header;
4519                         else if (header->type == sigmatchtype)
4520                                 foundsig = header;
4521                         header_prev = header;
4522                 }
4523                 if (found != NULL) {
4524                         result = dns_name_concatenate(name, origin,
4525                                                       foundname, NULL);
4526                         if (result != ISC_R_SUCCESS)
4527                                 goto unlock_node;
4528                         bind_rdataset(search->rbtdb, node, found,
4529                                       now, rdataset);
4530                         if (foundsig != NULL)
4531                                 bind_rdataset(search->rbtdb, node, foundsig,
4532                                               now, sigrdataset);
4533                         new_reference(search->rbtdb, node);
4534                         *nodep = node;
4535                         result = DNS_R_COVERINGNSEC;
4536                 } else if (!empty_node) {
4537                         result = ISC_R_NOTFOUND;
4538                 } else
4539                         result = dns_rbtnodechain_prev(&search->chain, NULL,
4540                                                        NULL);
4541  unlock_node:
4542                 NODE_UNLOCK(lock, locktype);
4543         } while (empty_node && result == ISC_R_SUCCESS);
4544         return (result);
4545 }
4546
4547 /*
4548  * Mark a database for response policy rewriting.
4549  */
4550 #ifdef BIND9
4551 static void
4552 get_rpz_enabled(dns_db_t *db, dns_rpz_st_t *st)
4553 {
4554         dns_rbtdb_t *rbtdb;
4555
4556         rbtdb = (dns_rbtdb_t *)db;
4557         REQUIRE(VALID_RBTDB(rbtdb));
4558         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4559         dns_rpz_enabled(rbtdb->rpz_cidr, st);
4560         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4561 }
4562
4563 /*
4564  * Search the CDIR block tree of a response policy tree of trees for all of
4565  * the IP addresses in an A or AAAA rdataset.
4566  * Among the policies for all IPv4 and IPv6 addresses for a name, choose
4567  *      the earliest configured policy,
4568  *      QNAME over IP over NSDNAME over NSIP,
4569  *      the longest prefix,
4570  *      the lexically smallest address.
4571  * The caller must have already checked that any existing policy was not
4572  * configured earlier than this policy zone and does not have a higher
4573  * precedence type.
4574  */
4575 static isc_result_t
4576 rpz_findips(dns_rpz_zone_t *rpz, dns_rpz_type_t rpz_type,
4577             dns_zone_t *zone, dns_db_t *db, dns_dbversion_t *version,
4578             dns_rdataset_t *ardataset, dns_rpz_st_t *st,
4579             dns_name_t *query_qname)
4580 {
4581         dns_rbtdb_t *rbtdb;
4582         struct in_addr ina;
4583         struct in6_addr in6a;
4584         isc_netaddr_t netaddr;
4585         dns_fixedname_t selfnamef, qnamef;
4586         dns_name_t *selfname, *qname;
4587         dns_rbtnode_t *node;
4588         dns_rdataset_t zrdataset;
4589         dns_rpz_cidr_bits_t prefix;
4590         isc_result_t result;
4591         dns_rpz_policy_t rpz_policy;
4592         dns_ttl_t ttl;
4593
4594         rbtdb = (dns_rbtdb_t *)db;
4595         REQUIRE(VALID_RBTDB(rbtdb));
4596         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4597
4598         if (rbtdb->rpz_cidr == NULL) {
4599                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4600                 return (ISC_R_UNEXPECTED);
4601         }
4602
4603         dns_fixedname_init(&selfnamef);
4604         dns_fixedname_init(&qnamef);
4605         selfname = dns_fixedname_name(&selfnamef);
4606         qname = dns_fixedname_name(&qnamef);
4607
4608         for (result = dns_rdataset_first(ardataset);
4609              result == ISC_R_SUCCESS;
4610              result = dns_rdataset_next(ardataset)) {
4611                 dns_rdata_t rdata = DNS_RDATA_INIT;
4612                 dns_rdataset_current(ardataset, &rdata);
4613                 switch (rdata.type) {
4614                 case dns_rdatatype_a:
4615                         INSIST(rdata.length == 4);
4616                         memcpy(&ina.s_addr, rdata.data, 4);
4617                         isc_netaddr_fromin(&netaddr, &ina);
4618                         break;
4619                 case dns_rdatatype_aaaa:
4620                         INSIST(rdata.length == 16);
4621                         memcpy(in6a.s6_addr, rdata.data, 16);
4622                         isc_netaddr_fromin6(&netaddr, &in6a);
4623                         break;
4624                 default:
4625                         continue;
4626                 }
4627
4628                 result = dns_rpz_cidr_find(rbtdb->rpz_cidr, &netaddr, rpz_type,
4629                                            selfname, qname, &prefix);
4630                 if (result != ISC_R_SUCCESS)
4631                         continue;
4632
4633                 /*
4634                  * If we already have a rule, discard this new rule if
4635                  * is not better.
4636                  * The caller has checked that st->m.rpz->num > rpz->num
4637                  * or st->m.rpz->num == rpz->num and st->m.type >= rpz_type
4638                  */
4639                 if (st->m.policy != DNS_RPZ_POLICY_MISS &&
4640                     st->m.rpz->num == rpz->num &&
4641                     (st->m.type < rpz_type ||
4642                      (st->m.type == rpz_type &&
4643                       (st->m.prefix > prefix ||
4644                        (st->m.prefix == prefix &&
4645                         0 > dns_name_rdatacompare(st->qname, qname))))))
4646                         continue;
4647
4648                 /*
4649                  * We have rpz_st an entry with a prefix at least as long as
4650                  * the prefix of the entry we had before.  Find the node
4651                  * corresponding to CDIR tree entry.
4652                  */
4653                 node = NULL;
4654                 result = dns_rbt_findnode(rbtdb->tree, qname, NULL,
4655                                           &node, NULL, 0, NULL, NULL);
4656                 if (result != ISC_R_SUCCESS) {
4657                         char namebuf[DNS_NAME_FORMATSIZE];
4658
4659                         dns_name_format(qname, namebuf, sizeof(namebuf));
4660                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_RPZ,
4661                                       DNS_LOGMODULE_RBTDB, DNS_RPZ_ERROR_LEVEL,
4662                                       "rpz_findips findnode(%s): %s",
4663                                       namebuf, isc_result_totext(result));
4664                         continue;
4665                 }
4666                 /*
4667                  * First look for a simple rewrite of the IP address.
4668                  * If that fails, look for a CNAME.  If we cannot find
4669                  * a CNAME or the CNAME is neither of the special forms
4670                  * "*" or ".", treat it like a real CNAME.
4671                  */
4672                 dns_rdataset_init(&zrdataset);
4673                 result = dns_db_findrdataset(db, node, version, ardataset->type,
4674                                              0, 0, &zrdataset, NULL);
4675                 if (result != ISC_R_SUCCESS)
4676                         result = dns_db_findrdataset(db, node, version,
4677                                                      dns_rdatatype_cname,
4678                                                      0, 0, &zrdataset, NULL);
4679                 if (result == ISC_R_SUCCESS) {
4680                         if (zrdataset.type != dns_rdatatype_cname) {
4681                                 rpz_policy = DNS_RPZ_POLICY_RECORD;
4682                         } else {
4683                                 rpz_policy = dns_rpz_decode_cname(&zrdataset,
4684                                                                   selfname);
4685                                 if (rpz_policy == DNS_RPZ_POLICY_RECORD ||
4686                                     rpz_policy == DNS_RPZ_POLICY_WILDCNAME)
4687                                         result = DNS_R_CNAME;
4688                         }
4689                         ttl = zrdataset.ttl;
4690                 } else {
4691                         rpz_policy = DNS_RPZ_POLICY_RECORD;
4692                         result = DNS_R_NXRRSET;
4693                         ttl = DNS_RPZ_TTL_DEFAULT;
4694                 }
4695
4696                 /*
4697                  * Use an overriding action specified in the configuration file
4698                  */
4699                 if (rpz->policy != DNS_RPZ_POLICY_GIVEN) {
4700                         /*
4701                          * only log DNS_RPZ_POLICY_DISABLED hits
4702                          */
4703                         if (rpz->policy == DNS_RPZ_POLICY_DISABLED) {
4704                                 if (isc_log_wouldlog(dns_lctx,
4705                                                      DNS_RPZ_INFO_LEVEL)) {
4706                                         char qname_buf[DNS_NAME_FORMATSIZE];
4707                                         char rpz_qname_buf[DNS_NAME_FORMATSIZE];
4708                                         dns_name_format(query_qname, qname_buf,
4709                                                         sizeof(qname_buf));
4710                                         dns_name_format(qname, rpz_qname_buf,
4711                                                         sizeof(rpz_qname_buf));
4712
4713                                         isc_log_write(dns_lctx,
4714                                                 DNS_LOGCATEGORY_RPZ,
4715                                                 DNS_LOGMODULE_RBTDB,
4716                                                 DNS_RPZ_INFO_LEVEL,
4717                                                 "disabled rpz %s %s rewrite"
4718                                                 " %s via %s",
4719                                                 dns_rpz_type2str(rpz_type),
4720                                                 dns_rpz_policy2str(rpz_policy),
4721                                                 qname_buf, rpz_qname_buf);
4722                                 }
4723                                 continue;
4724                         }
4725
4726                         rpz_policy = rpz->policy;
4727                 }
4728
4729                 if (dns_rdataset_isassociated(st->m.rdataset))
4730                         dns_rdataset_disassociate(st->m.rdataset);
4731                 if (st->m.node != NULL)
4732                         dns_db_detachnode(st->m.db, &st->m.node);
4733                 if (st->m.db != NULL)
4734                         dns_db_detach(&st->m.db);
4735                 if (st->m.zone != NULL)
4736                         dns_zone_detach(&st->m.zone);
4737                 st->m.rpz = rpz;
4738                 st->m.type = rpz_type;
4739                 st->m.prefix = prefix;
4740                 st->m.policy = rpz_policy;
4741                 st->m.ttl = ttl;
4742                 st->m.result = result;
4743                 dns_name_copy(qname, st->qname, NULL);
4744                 if ((rpz_policy == DNS_RPZ_POLICY_RECORD ||
4745                     rpz_policy == DNS_RPZ_POLICY_WILDCNAME) &&
4746                     result != DNS_R_NXRRSET) {
4747                         dns_rdataset_clone(&zrdataset,st->m.rdataset);
4748                         dns_db_attachnode(db, node, &st->m.node);
4749                 }
4750                 dns_db_attach(db, &st->m.db);
4751                 st->m.version = version;
4752                 dns_zone_attach(zone, &st->m.zone);
4753                 if (dns_rdataset_isassociated(&zrdataset))
4754                         dns_rdataset_disassociate(&zrdataset);
4755         }
4756
4757         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4758         return (ISC_R_SUCCESS);
4759 }
4760 #endif
4761
4762 static isc_result_t
4763 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4764            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4765            dns_dbnode_t **nodep, dns_name_t *foundname,
4766            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4767 {
4768         dns_rbtnode_t *node = NULL;
4769         isc_result_t result;
4770         rbtdb_search_t search;
4771         isc_boolean_t cname_ok = ISC_TRUE;
4772         isc_boolean_t empty_node;
4773         nodelock_t *lock;
4774         isc_rwlocktype_t locktype;
4775         rdatasetheader_t *header, *header_prev, *header_next;
4776         rdatasetheader_t *found, *nsheader;
4777         rdatasetheader_t *foundsig, *nssig, *cnamesig;
4778         rdatasetheader_t *update, *updatesig;
4779         rbtdb_rdatatype_t sigtype, negtype;
4780
4781         UNUSED(version);
4782
4783         search.rbtdb = (dns_rbtdb_t *)db;
4784
4785         REQUIRE(VALID_RBTDB(search.rbtdb));
4786         REQUIRE(version == NULL);
4787
4788         if (now == 0)
4789                 isc_stdtime_get(&now);
4790
4791         search.rbtversion = NULL;
4792         search.serial = 1;
4793         search.options = options;
4794         search.copy_name = ISC_FALSE;
4795         search.need_cleanup = ISC_FALSE;
4796         search.wild = ISC_FALSE;
4797         search.zonecut = NULL;
4798         dns_fixedname_init(&search.zonecut_name);
4799         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4800         search.now = now;
4801         update = NULL;
4802         updatesig = NULL;
4803
4804         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4805
4806         /*
4807          * Search down from the root of the tree.  If, while going down, we
4808          * encounter a callback node, cache_zonecut_callback() will search the
4809          * rdatasets at the zone cut for a DNAME rdataset.
4810          */
4811         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4812                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
4813                                   cache_zonecut_callback, &search);
4814
4815         if (result == DNS_R_PARTIALMATCH) {
4816                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4817                         result = find_coveringnsec(&search, nodep, now,
4818                                                    foundname, rdataset,
4819                                                    sigrdataset);
4820                         if (result == DNS_R_COVERINGNSEC)
4821                                 goto tree_exit;
4822                 }
4823                 if (search.zonecut != NULL) {
4824                     result = setup_delegation(&search, nodep, foundname,
4825                                               rdataset, sigrdataset);
4826                     goto tree_exit;
4827                 } else {
4828                 find_ns:
4829                         result = find_deepest_zonecut(&search, node, nodep,
4830                                                       foundname, rdataset,
4831                                                       sigrdataset);
4832                         goto tree_exit;
4833                 }
4834         } else if (result != ISC_R_SUCCESS)
4835                 goto tree_exit;
4836
4837         /*
4838          * Certain DNSSEC types are not subject to CNAME matching
4839          * (RFC4035, section 2.5 and RFC3007).
4840          *
4841          * We don't check for RRSIG, because we don't store RRSIG records
4842          * directly.
4843          */
4844         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4845                 cname_ok = ISC_FALSE;
4846
4847         /*
4848          * We now go looking for rdata...
4849          */
4850
4851         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4852         locktype = isc_rwlocktype_read;
4853         NODE_LOCK(lock, locktype);
4854
4855         found = NULL;
4856         foundsig = NULL;
4857         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4858         negtype = RBTDB_RDATATYPE_VALUE(0, type);
4859         nsheader = NULL;
4860         nssig = NULL;
4861         cnamesig = NULL;
4862         empty_node = ISC_TRUE;
4863         header_prev = NULL;
4864         for (header = node->data; header != NULL; header = header_next) {
4865                 header_next = header->next;
4866                 if (header->rdh_ttl <= now) {
4867                         /*
4868                          * This rdataset is stale.  If no one else is using the
4869                          * node, we can clean it up right now, otherwise we
4870                          * mark it as stale, and the node as dirty, so it will
4871                          * get cleaned up later.
4872                          */
4873                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4874                             (locktype == isc_rwlocktype_write ||
4875                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4876                                 /*
4877                                  * We update the node's status only when we
4878                                  * can get write access.
4879                                  */
4880                                 locktype = isc_rwlocktype_write;
4881
4882                                 if (dns_rbtnode_refcurrent(node) == 0) {
4883                                         isc_mem_t *mctx;
4884
4885                                         mctx = search.rbtdb->common.mctx;
4886                                         clean_stale_headers(search.rbtdb, mctx,
4887                                                             header);
4888                                         if (header_prev != NULL)
4889                                                 header_prev->next =
4890                                                         header->next;
4891                                         else
4892                                                 node->data = header->next;
4893                                         free_rdataset(search.rbtdb, mctx,
4894                                                       header);
4895                                 } else {
4896                                         header->attributes |=
4897                                                 RDATASET_ATTR_STALE;
4898                                         node->dirty = 1;
4899                                         header_prev = header;
4900                                 }
4901                         } else
4902                                 header_prev = header;
4903                 } else if (EXISTS(header)) {
4904                         /*
4905                          * We now know that there is at least one active
4906                          * non-stale rdataset at this node.
4907                          */
4908                         empty_node = ISC_FALSE;
4909
4910                         /*
4911                          * If we found a type we were looking for, remember
4912                          * it.
4913                          */
4914                         if (header->type == type ||
4915                             (type == dns_rdatatype_any &&
4916                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4917                             (cname_ok && header->type ==
4918                              dns_rdatatype_cname)) {
4919                                 /*
4920                                  * We've found the answer.
4921                                  */
4922                                 found = header;
4923                                 if (header->type == dns_rdatatype_cname &&
4924                                     cname_ok &&
4925                                     cnamesig != NULL) {
4926                                         /*
4927                                          * If we've already got the
4928                                          * CNAME RRSIG, use it.
4929                                          */
4930                                         foundsig = cnamesig;
4931                                 }
4932                         } else if (header->type == sigtype) {
4933                                 /*
4934                                  * We've found the RRSIG rdataset for our
4935                                  * target type.  Remember it.
4936                                  */
4937                                 foundsig = header;
4938                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4939                                    header->type == negtype) {
4940                                 /*
4941                                  * We've found a negative cache entry.
4942                                  */
4943                                 found = header;
4944                         } else if (header->type == dns_rdatatype_ns) {
4945                                 /*
4946                                  * Remember a NS rdataset even if we're
4947                                  * not specifically looking for it, because
4948                                  * we might need it later.
4949                                  */
4950                                 nsheader = header;
4951                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4952                                 /*
4953                                  * If we need the NS rdataset, we'll also
4954                                  * need its signature.
4955                                  */
4956                                 nssig = header;
4957                         } else if (cname_ok &&
4958                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
4959                                 /*
4960                                  * If we get a CNAME match, we'll also need
4961                                  * its signature.
4962                                  */
4963                                 cnamesig = header;
4964                         }
4965                         header_prev = header;
4966                 } else
4967                         header_prev = header;
4968         }
4969
4970         if (empty_node) {
4971                 /*
4972                  * We have an exact match for the name, but there are no
4973                  * extant rdatasets.  That means that this node doesn't
4974                  * meaningfully exist, and that we really have a partial match.
4975                  */
4976                 NODE_UNLOCK(lock, locktype);
4977                 goto find_ns;
4978         }
4979
4980         /*
4981          * If we didn't find what we were looking for...
4982          */
4983         if (found == NULL ||
4984             (DNS_TRUST_ADDITIONAL(found->trust) &&
4985              ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4986             (found->trust == dns_trust_glue &&
4987              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4988             (DNS_TRUST_PENDING(found->trust) &&
4989              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4990                 /*
4991                  * If there is an NS rdataset at this node, then this is the
4992                  * deepest zone cut.
4993                  */
4994                 if (nsheader != NULL) {
4995                         if (nodep != NULL) {
4996                                 new_reference(search.rbtdb, node);
4997                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4998                                 *nodep = node;
4999                         }
5000                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
5001                                       rdataset);
5002                         if (need_headerupdate(nsheader, search.now))
5003                                 update = nsheader;
5004                         if (nssig != NULL) {
5005                                 bind_rdataset(search.rbtdb, node, nssig,
5006                                               search.now, sigrdataset);
5007                                 if (need_headerupdate(nssig, search.now))
5008                                         updatesig = nssig;
5009                         }
5010                         result = DNS_R_DELEGATION;
5011                         goto node_exit;
5012                 }
5013
5014                 /*
5015                  * Go find the deepest zone cut.
5016                  */
5017                 NODE_UNLOCK(lock, locktype);
5018                 goto find_ns;
5019         }
5020
5021         /*
5022          * We found what we were looking for, or we found a CNAME.
5023          */
5024
5025         if (nodep != NULL) {
5026                 new_reference(search.rbtdb, node);
5027                 INSIST(!ISC_LINK_LINKED(node, deadlink));
5028                 *nodep = node;
5029         }
5030
5031         if (NEGATIVE(found)) {
5032                 /*
5033                  * We found a negative cache entry.
5034                  */
5035                 if (NXDOMAIN(found))
5036                         result = DNS_R_NCACHENXDOMAIN;
5037                 else
5038                         result = DNS_R_NCACHENXRRSET;
5039         } else if (type != found->type &&
5040                    type != dns_rdatatype_any &&
5041                    found->type == dns_rdatatype_cname) {
5042                 /*
5043                  * We weren't doing an ANY query and we found a CNAME instead
5044                  * of the type we were looking for, so we need to indicate
5045                  * that result to the caller.
5046                  */
5047                 result = DNS_R_CNAME;
5048         } else {
5049                 /*
5050                  * An ordinary successful query!
5051                  */
5052                 result = ISC_R_SUCCESS;
5053         }
5054
5055         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
5056             result == DNS_R_NCACHENXRRSET) {
5057                 bind_rdataset(search.rbtdb, node, found, search.now,
5058                               rdataset);
5059                 if (need_headerupdate(found, search.now))
5060                         update = found;
5061                 if (!NEGATIVE(found) && foundsig != NULL) {
5062                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
5063                                       sigrdataset);
5064                         if (need_headerupdate(foundsig, search.now))
5065                                 updatesig = foundsig;
5066                 }
5067         }
5068
5069  node_exit:
5070         if ((update != NULL || updatesig != NULL) &&
5071             locktype != isc_rwlocktype_write) {
5072                 NODE_UNLOCK(lock, locktype);
5073                 NODE_LOCK(lock, isc_rwlocktype_write);
5074                 locktype = isc_rwlocktype_write;
5075                 POST(locktype);
5076         }
5077         if (update != NULL && need_headerupdate(update, search.now))
5078                 update_header(search.rbtdb, update, search.now);
5079         if (updatesig != NULL && need_headerupdate(updatesig, search.now))
5080                 update_header(search.rbtdb, updatesig, search.now);
5081
5082         NODE_UNLOCK(lock, locktype);
5083
5084  tree_exit:
5085         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5086
5087         /*
5088          * If we found a zonecut but aren't going to use it, we have to
5089          * let go of it.
5090          */
5091         if (search.need_cleanup) {
5092                 node = search.zonecut;
5093                 INSIST(node != NULL);
5094                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
5095
5096                 NODE_LOCK(lock, isc_rwlocktype_read);
5097                 decrement_reference(search.rbtdb, node, 0,
5098                                     isc_rwlocktype_read, isc_rwlocktype_none,
5099                                     ISC_FALSE);
5100                 NODE_UNLOCK(lock, isc_rwlocktype_read);
5101         }
5102
5103         dns_rbtnodechain_reset(&search.chain);
5104
5105         return (result);
5106 }
5107
5108 static isc_result_t
5109 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
5110                   isc_stdtime_t now, dns_dbnode_t **nodep,
5111                   dns_name_t *foundname,
5112                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
5113 {
5114         dns_rbtnode_t *node = NULL;
5115         nodelock_t *lock;
5116         isc_result_t result;
5117         rbtdb_search_t search;
5118         rdatasetheader_t *header, *header_prev, *header_next;
5119         rdatasetheader_t *found, *foundsig;
5120         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
5121         isc_rwlocktype_t locktype;
5122
5123         search.rbtdb = (dns_rbtdb_t *)db;
5124
5125         REQUIRE(VALID_RBTDB(search.rbtdb));
5126
5127         if (now == 0)
5128                 isc_stdtime_get(&now);
5129
5130         search.rbtversion = NULL;
5131         search.serial = 1;
5132         search.options = options;
5133         search.copy_name = ISC_FALSE;
5134         search.need_cleanup = ISC_FALSE;
5135         search.wild = ISC_FALSE;
5136         search.zonecut = NULL;
5137         dns_fixedname_init(&search.zonecut_name);
5138         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
5139         search.now = now;
5140
5141         if ((options & DNS_DBFIND_NOEXACT) != 0)
5142                 rbtoptions |= DNS_RBTFIND_NOEXACT;
5143
5144         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5145
5146         /*
5147          * Search down from the root of the tree.
5148          */
5149         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
5150                                   &search.chain, rbtoptions, NULL, &search);
5151
5152         if (result == DNS_R_PARTIALMATCH) {
5153         find_ns:
5154                 result = find_deepest_zonecut(&search, node, nodep, foundname,
5155                                               rdataset, sigrdataset);
5156                 goto tree_exit;
5157         } else if (result != ISC_R_SUCCESS)
5158                 goto tree_exit;
5159
5160         /*
5161          * We now go looking for an NS rdataset at the node.
5162          */
5163
5164         lock = &(search.rbtdb->node_locks[node->locknum].lock);
5165         locktype = isc_rwlocktype_read;
5166         NODE_LOCK(lock, locktype);
5167
5168         found = NULL;
5169         foundsig = NULL;
5170         header_prev = NULL;
5171         for (header = node->data; header != NULL; header = header_next) {
5172                 header_next = header->next;
5173                 if (header->rdh_ttl <= now) {
5174                         /*
5175                          * This rdataset is stale.  If no one else is using the
5176                          * node, we can clean it up right now, otherwise we
5177                          * mark it as stale, and the node as dirty, so it will
5178                          * get cleaned up later.
5179                          */
5180                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5181                             (locktype == isc_rwlocktype_write ||
5182                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5183                                 /*
5184                                  * We update the node's status only when we
5185                                  * can get write access.
5186                                  */
5187                                 locktype = isc_rwlocktype_write;
5188
5189                                 if (dns_rbtnode_refcurrent(node) == 0) {
5190                                         isc_mem_t *mctx;
5191
5192                                         mctx = search.rbtdb->common.mctx;
5193                                         clean_stale_headers(search.rbtdb, mctx,
5194                                                             header);
5195                                         if (header_prev != NULL)
5196                                                 header_prev->next =
5197                                                         header->next;
5198                                         else
5199                                                 node->data = header->next;
5200                                         free_rdataset(search.rbtdb, mctx,
5201                                                       header);
5202                                 } else {
5203                                         header->attributes |=
5204                                                 RDATASET_ATTR_STALE;
5205                                         node->dirty = 1;
5206                                         header_prev = header;
5207                                 }
5208                         } else
5209                                 header_prev = header;
5210                 } else if (EXISTS(header)) {
5211                         /*
5212                          * If we found a type we were looking for, remember
5213                          * it.
5214                          */
5215                         if (header->type == dns_rdatatype_ns) {
5216                                 /*
5217                                  * Remember a NS rdataset even if we're
5218                                  * not specifically looking for it, because
5219                                  * we might need it later.
5220                                  */
5221                                 found = header;
5222                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
5223                                 /*
5224                                  * If we need the NS rdataset, we'll also
5225                                  * need its signature.
5226                                  */
5227                                 foundsig = header;
5228                         }
5229                         header_prev = header;
5230                 } else
5231                         header_prev = header;
5232         }
5233
5234         if (found == NULL) {
5235                 /*
5236                  * No NS records here.
5237                  */
5238                 NODE_UNLOCK(lock, locktype);
5239                 goto find_ns;
5240         }
5241
5242         if (nodep != NULL) {
5243                 new_reference(search.rbtdb, node);
5244                 INSIST(!ISC_LINK_LINKED(node, deadlink));
5245                 *nodep = node;
5246         }
5247
5248         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
5249         if (foundsig != NULL)
5250                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
5251                               sigrdataset);
5252
5253         if (need_headerupdate(found, search.now) ||
5254             (foundsig != NULL &&  need_headerupdate(foundsig, search.now))) {
5255                 if (locktype != isc_rwlocktype_write) {
5256                         NODE_UNLOCK(lock, locktype);
5257                         NODE_LOCK(lock, isc_rwlocktype_write);
5258                         locktype = isc_rwlocktype_write;
5259                         POST(locktype);
5260                 }
5261                 if (need_headerupdate(found, search.now))
5262                         update_header(search.rbtdb, found, search.now);
5263                 if (foundsig != NULL &&
5264                     need_headerupdate(foundsig, search.now)) {
5265                         update_header(search.rbtdb, foundsig, search.now);
5266                 }
5267         }
5268
5269         NODE_UNLOCK(lock, locktype);
5270
5271  tree_exit:
5272         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5273
5274         INSIST(!search.need_cleanup);
5275
5276         dns_rbtnodechain_reset(&search.chain);
5277
5278         if (result == DNS_R_DELEGATION)
5279                 result = ISC_R_SUCCESS;
5280
5281         return (result);
5282 }
5283
5284 static void
5285 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
5286         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5287         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
5288         unsigned int refs;
5289
5290         REQUIRE(VALID_RBTDB(rbtdb));
5291         REQUIRE(targetp != NULL && *targetp == NULL);
5292
5293         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
5294         dns_rbtnode_refincrement(node, &refs);
5295         INSIST(refs != 0);
5296         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
5297
5298         *targetp = source;
5299 }
5300
5301 static void
5302 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
5303         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5304         dns_rbtnode_t *node;
5305         isc_boolean_t want_free = ISC_FALSE;
5306         isc_boolean_t inactive = ISC_FALSE;
5307         rbtdb_nodelock_t *nodelock;
5308
5309         REQUIRE(VALID_RBTDB(rbtdb));
5310         REQUIRE(targetp != NULL && *targetp != NULL);
5311
5312         node = (dns_rbtnode_t *)(*targetp);
5313         nodelock = &rbtdb->node_locks[node->locknum];
5314
5315         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
5316
5317         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
5318                                 isc_rwlocktype_none, ISC_FALSE)) {
5319                 if (isc_refcount_current(&nodelock->references) == 0 &&
5320                     nodelock->exiting) {
5321                         inactive = ISC_TRUE;
5322                 }
5323         }
5324
5325         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
5326
5327         *targetp = NULL;
5328
5329         if (inactive) {
5330                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5331                 rbtdb->active--;
5332                 if (rbtdb->active == 0)
5333                         want_free = ISC_TRUE;
5334                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5335                 if (want_free) {
5336                         char buf[DNS_NAME_FORMATSIZE];
5337                         if (dns_name_dynamic(&rbtdb->common.origin))
5338                                 dns_name_format(&rbtdb->common.origin, buf,
5339                                                 sizeof(buf));
5340                         else
5341                                 strcpy(buf, "<UNKNOWN>");
5342                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
5343                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
5344                                       "calling free_rbtdb(%s)", buf);
5345                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
5346                 }
5347         }
5348 }
5349
5350 static isc_result_t
5351 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
5352         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5353         dns_rbtnode_t *rbtnode = node;
5354         rdatasetheader_t *header;
5355         isc_boolean_t force_expire = ISC_FALSE;
5356         /*
5357          * These are the category and module used by the cache cleaner.
5358          */
5359         isc_boolean_t log = ISC_FALSE;
5360         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
5361         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
5362         int level = ISC_LOG_DEBUG(2);
5363         char printname[DNS_NAME_FORMATSIZE];
5364
5365         REQUIRE(VALID_RBTDB(rbtdb));
5366
5367         /*
5368          * Caller must hold a tree lock.
5369          */
5370
5371         if (now == 0)
5372                 isc_stdtime_get(&now);
5373
5374         if (isc_mem_isovermem(rbtdb->common.mctx)) {
5375                 isc_uint32_t val;
5376
5377                 isc_random_get(&val);
5378                 /*
5379                  * XXXDCL Could stand to have a better policy, like LRU.
5380                  */
5381                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
5382
5383                 /*
5384                  * Note that 'log' can be true IFF overmem is also true.
5385                  * overmem can currently only be true for cache
5386                  * databases -- hence all of the "overmem cache" log strings.
5387                  */
5388                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
5389                 if (log)
5390                         isc_log_write(dns_lctx, category, module, level,
5391                                       "overmem cache: %s %s",
5392                                       force_expire ? "FORCE" : "check",
5393                                       dns_rbt_formatnodename(rbtnode,
5394                                                            printname,
5395                                                            sizeof(printname)));
5396         }
5397
5398         /*
5399          * We may not need write access, but this code path is not performance
5400          * sensitive, so it should be okay to always lock as a writer.
5401          */
5402         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5403                   isc_rwlocktype_write);
5404
5405         for (header = rbtnode->data; header != NULL; header = header->next)
5406                 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
5407                         /*
5408                          * We don't check if refcurrent(rbtnode) == 0 and try
5409                          * to free like we do in cache_find(), because
5410                          * refcurrent(rbtnode) must be non-zero.  This is so
5411                          * because 'node' is an argument to the function.
5412                          */
5413                         header->attributes |= RDATASET_ATTR_STALE;
5414                         rbtnode->dirty = 1;
5415                         if (log)
5416                                 isc_log_write(dns_lctx, category, module,
5417                                               level, "overmem cache: stale %s",
5418                                               printname);
5419                 } else if (force_expire) {
5420                         if (! RETAIN(header)) {
5421                                 set_ttl(rbtdb, header, 0);
5422                                 header->attributes |= RDATASET_ATTR_STALE;
5423                                 rbtnode->dirty = 1;
5424                         } else if (log) {
5425                                 isc_log_write(dns_lctx, category, module,
5426                                               level, "overmem cache: "
5427                                               "reprieve by RETAIN() %s",
5428                                               printname);
5429                         }
5430                 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
5431                         isc_log_write(dns_lctx, category, module, level,
5432                                       "overmem cache: saved %s", printname);
5433
5434         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5435                     isc_rwlocktype_write);
5436
5437         return (ISC_R_SUCCESS);
5438 }
5439
5440 static void
5441 overmem(dns_db_t *db, isc_boolean_t overmem) {
5442         /* This is an empty callback.  See adb.c:water() */
5443
5444         UNUSED(db);
5445         UNUSED(overmem);
5446
5447         return;
5448 }
5449
5450 static void
5451 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5452         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5453         dns_rbtnode_t *rbtnode = node;
5454         isc_boolean_t first;
5455
5456         REQUIRE(VALID_RBTDB(rbtdb));
5457
5458         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5459                   isc_rwlocktype_read);
5460
5461         fprintf(out, "node %p, %u references, locknum = %u\n",
5462                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5463                 rbtnode->locknum);
5464         if (rbtnode->data != NULL) {
5465                 rdatasetheader_t *current, *top_next;
5466
5467                 for (current = rbtnode->data; current != NULL;
5468                      current = top_next) {
5469                         top_next = current->next;
5470                         first = ISC_TRUE;
5471                         fprintf(out, "\ttype %u", current->type);
5472                         do {
5473                                 if (!first)
5474                                         fprintf(out, "\t");
5475                                 first = ISC_FALSE;
5476                                 fprintf(out,
5477                                         "\tserial = %lu, ttl = %u, "
5478                                         "trust = %u, attributes = %u, "
5479                                         "resign = %u\n",
5480                                         (unsigned long)current->serial,
5481                                         current->rdh_ttl,
5482                                         current->trust,
5483                                         current->attributes,
5484                                         current->resign);
5485                                 current = current->down;
5486                         } while (current != NULL);
5487                 }
5488         } else
5489                 fprintf(out, "(empty)\n");
5490
5491         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5492                     isc_rwlocktype_read);
5493 }
5494
5495 static isc_result_t
5496 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5497 {
5498         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5499         rbtdb_dbiterator_t *rbtdbiter;
5500
5501         REQUIRE(VALID_RBTDB(rbtdb));
5502
5503         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5504         if (rbtdbiter == NULL)
5505                 return (ISC_R_NOMEMORY);
5506
5507         rbtdbiter->common.methods = &dbiterator_methods;
5508         rbtdbiter->common.db = NULL;
5509         dns_db_attach(db, &rbtdbiter->common.db);
5510         rbtdbiter->common.relative_names =
5511                         ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5512         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5513         rbtdbiter->common.cleaning = ISC_FALSE;
5514         rbtdbiter->paused = ISC_TRUE;
5515         rbtdbiter->tree_locked = isc_rwlocktype_none;
5516         rbtdbiter->result = ISC_R_SUCCESS;
5517         dns_fixedname_init(&rbtdbiter->name);
5518         dns_fixedname_init(&rbtdbiter->origin);
5519         rbtdbiter->node = NULL;
5520         rbtdbiter->delete = 0;
5521         rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5522         rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5523         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5524         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5525         dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5526         if (rbtdbiter->nsec3only)
5527                 rbtdbiter->current = &rbtdbiter->nsec3chain;
5528         else
5529                 rbtdbiter->current = &rbtdbiter->chain;
5530
5531         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5532
5533         return (ISC_R_SUCCESS);
5534 }
5535
5536 static isc_result_t
5537 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5538                   dns_rdatatype_t type, dns_rdatatype_t covers,
5539                   isc_stdtime_t now, dns_rdataset_t *rdataset,
5540                   dns_rdataset_t *sigrdataset)
5541 {
5542         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5543         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5544         rdatasetheader_t *header, *header_next, *found, *foundsig;
5545         rbtdb_serial_t serial;
5546         rbtdb_version_t *rbtversion = version;
5547         isc_boolean_t close_version = ISC_FALSE;
5548         rbtdb_rdatatype_t matchtype, sigmatchtype;
5549
5550         REQUIRE(VALID_RBTDB(rbtdb));
5551         REQUIRE(type != dns_rdatatype_any);
5552         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
5553
5554         if (rbtversion == NULL) {
5555                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5556                 close_version = ISC_TRUE;
5557         }
5558         serial = rbtversion->serial;
5559         now = 0;
5560
5561         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5562                   isc_rwlocktype_read);
5563
5564         found = NULL;
5565         foundsig = NULL;
5566         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5567         if (covers == 0)
5568                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5569         else
5570                 sigmatchtype = 0;
5571
5572         for (header = rbtnode->data; header != NULL; header = header_next) {
5573                 header_next = header->next;
5574                 do {
5575                         if (header->serial <= serial &&
5576                             !IGNORE(header)) {
5577                                 /*
5578                                  * Is this a "this rdataset doesn't
5579                                  * exist" record?
5580                                  */
5581                                 if (NONEXISTENT(header))
5582                                         header = NULL;
5583                                 break;
5584                         } else
5585                                 header = header->down;
5586                 } while (header != NULL);
5587                 if (header != NULL) {
5588                         /*
5589                          * We have an active, extant rdataset.  If it's a
5590                          * type we're looking for, remember it.
5591                          */
5592                         if (header->type == matchtype) {
5593                                 found = header;
5594                                 if (foundsig != NULL)
5595                                         break;
5596                         } else if (header->type == sigmatchtype) {
5597                                 foundsig = header;
5598                                 if (found != NULL)
5599                                         break;
5600                         }
5601                 }
5602         }
5603         if (found != NULL) {
5604                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5605                 if (foundsig != NULL)
5606                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5607                                       sigrdataset);
5608         }
5609
5610         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5611                     isc_rwlocktype_read);
5612
5613         if (close_version)
5614                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5615                              ISC_FALSE);
5616
5617         if (found == NULL)
5618                 return (ISC_R_NOTFOUND);
5619
5620         return (ISC_R_SUCCESS);
5621 }
5622
5623 static isc_result_t
5624 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5625                    dns_rdatatype_t type, dns_rdatatype_t covers,
5626                    isc_stdtime_t now, dns_rdataset_t *rdataset,
5627                    dns_rdataset_t *sigrdataset)
5628 {
5629         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5630         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5631         rdatasetheader_t *header, *header_next, *found, *foundsig;
5632         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5633         isc_result_t result;
5634         nodelock_t *lock;
5635         isc_rwlocktype_t locktype;
5636
5637         REQUIRE(VALID_RBTDB(rbtdb));
5638         REQUIRE(type != dns_rdatatype_any);
5639
5640         UNUSED(version);
5641
5642         result = ISC_R_SUCCESS;
5643
5644         if (now == 0)
5645                 isc_stdtime_get(&now);
5646
5647         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5648         locktype = isc_rwlocktype_read;
5649         NODE_LOCK(lock, locktype);
5650
5651         found = NULL;
5652         foundsig = NULL;
5653         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5654         negtype = RBTDB_RDATATYPE_VALUE(0, type);
5655         if (covers == 0)
5656                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5657         else
5658                 sigmatchtype = 0;
5659
5660         for (header = rbtnode->data; header != NULL; header = header_next) {
5661                 header_next = header->next;
5662                 if (header->rdh_ttl <= now) {
5663                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5664                             (locktype == isc_rwlocktype_write ||
5665                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5666                                 /*
5667                                  * We update the node's status only when we
5668                                  * can get write access.
5669                                  */
5670                                 locktype = isc_rwlocktype_write;
5671
5672                                 /*
5673                                  * We don't check if refcurrent(rbtnode) == 0
5674                                  * and try to free like we do in cache_find(),
5675                                  * because refcurrent(rbtnode) must be
5676                                  * non-zero.  This is so because 'node' is an
5677                                  * argument to the function.
5678                                  */
5679                                 header->attributes |= RDATASET_ATTR_STALE;
5680                                 rbtnode->dirty = 1;
5681                         }
5682                 } else if (EXISTS(header)) {
5683                         if (header->type == matchtype)
5684                                 found = header;
5685                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5686                                  header->type == negtype)
5687                                 found = header;
5688                         else if (header->type == sigmatchtype)
5689                                 foundsig = header;
5690                 }
5691         }
5692         if (found != NULL) {
5693                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5694                 if (!NEGATIVE(found) && foundsig != NULL)
5695                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5696                                       sigrdataset);
5697         }
5698
5699         NODE_UNLOCK(lock, locktype);
5700
5701         if (found == NULL)
5702                 return (ISC_R_NOTFOUND);
5703
5704         if (NEGATIVE(found)) {
5705                 /*
5706                  * We found a negative cache entry.
5707                  */
5708                 if (NXDOMAIN(found))
5709                         result = DNS_R_NCACHENXDOMAIN;
5710                 else
5711                         result = DNS_R_NCACHENXRRSET;
5712         }
5713
5714         return (result);
5715 }
5716
5717 static isc_result_t
5718 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5719              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5720 {
5721         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5722         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5723         rbtdb_version_t *rbtversion = version;
5724         rbtdb_rdatasetiter_t *iterator;
5725         unsigned int refs;
5726
5727         REQUIRE(VALID_RBTDB(rbtdb));
5728
5729         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5730         if (iterator == NULL)
5731                 return (ISC_R_NOMEMORY);
5732
5733         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5734                 now = 0;
5735                 if (rbtversion == NULL)
5736                         currentversion(db,
5737                                  (dns_dbversion_t **) (void *)(&rbtversion));
5738                 else {
5739                         unsigned int refs;
5740
5741                         INSIST(rbtversion->rbtdb == rbtdb);
5742
5743                         isc_refcount_increment(&rbtversion->references,
5744                                                &refs);
5745                         INSIST(refs > 1);
5746                 }
5747         } else {
5748                 if (now == 0)
5749                         isc_stdtime_get(&now);
5750                 rbtversion = NULL;
5751         }
5752
5753         iterator->common.magic = DNS_RDATASETITER_MAGIC;
5754         iterator->common.methods = &rdatasetiter_methods;
5755         iterator->common.db = db;
5756         iterator->common.node = node;
5757         iterator->common.version = (dns_dbversion_t *)rbtversion;
5758         iterator->common.now = now;
5759
5760         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5761
5762         dns_rbtnode_refincrement(rbtnode, &refs);
5763         INSIST(refs != 0);
5764
5765         iterator->current = NULL;
5766
5767         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5768
5769         *iteratorp = (dns_rdatasetiter_t *)iterator;
5770
5771         return (ISC_R_SUCCESS);
5772 }
5773
5774 static isc_boolean_t
5775 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5776         rdatasetheader_t *header, *header_next;
5777         isc_boolean_t cname, other_data;
5778         dns_rdatatype_t rdtype;
5779
5780         /*
5781          * The caller must hold the node lock.
5782          */
5783
5784         /*
5785          * Look for CNAME and "other data" rdatasets active in our version.
5786          */
5787         cname = ISC_FALSE;
5788         other_data = ISC_FALSE;
5789         for (header = node->data; header != NULL; header = header_next) {
5790                 header_next = header->next;
5791                 if (header->type == dns_rdatatype_cname) {
5792                         /*
5793                          * Look for an active extant CNAME.
5794                          */
5795                         do {
5796                                 if (header->serial <= serial &&
5797                                     !IGNORE(header)) {
5798                                         /*
5799                                          * Is this a "this rdataset doesn't
5800                                          * exist" record?
5801                                          */
5802                                         if (NONEXISTENT(header))
5803                                                 header = NULL;
5804                                         break;
5805                                 } else
5806                                         header = header->down;
5807                         } while (header != NULL);
5808                         if (header != NULL)
5809                                 cname = ISC_TRUE;
5810                 } else {
5811                         /*
5812                          * Look for active extant "other data".
5813                          *
5814                          * "Other data" is any rdataset whose type is not
5815                          * KEY, NSEC, SIG or RRSIG.
5816                          */
5817                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
5818                         if (rdtype != dns_rdatatype_key &&
5819                             rdtype != dns_rdatatype_sig &&
5820                             rdtype != dns_rdatatype_nsec &&
5821                             rdtype != dns_rdatatype_rrsig) {
5822                                 /*
5823                                  * Is it active and extant?
5824                                  */
5825                                 do {
5826                                         if (header->serial <= serial &&
5827                                             !IGNORE(header)) {
5828                                                 /*
5829                                                  * Is this a "this rdataset
5830                                                  * doesn't exist" record?
5831                                                  */
5832                                                 if (NONEXISTENT(header))
5833                                                         header = NULL;
5834                                                 break;
5835                                         } else
5836                                                 header = header->down;
5837                                 } while (header != NULL);
5838                                 if (header != NULL)
5839                                         other_data = ISC_TRUE;
5840                         }
5841                 }
5842         }
5843
5844         if (cname && other_data)
5845                 return (ISC_TRUE);
5846
5847         return (ISC_FALSE);
5848 }
5849
5850 static isc_result_t
5851 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5852         isc_result_t result;
5853
5854         INSIST(!IS_CACHE(rbtdb));
5855         INSIST(newheader->heap_index == 0);
5856         INSIST(!ISC_LINK_LINKED(newheader, link));
5857
5858         result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5859         return (result);
5860 }
5861
5862 static isc_result_t
5863 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5864     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5865     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5866 {
5867         rbtdb_changed_t *changed = NULL;
5868         rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
5869         unsigned char *merged;
5870         isc_result_t result;
5871         isc_boolean_t header_nx;
5872         isc_boolean_t newheader_nx;
5873         isc_boolean_t merge;
5874         dns_rdatatype_t rdtype, covers;
5875         rbtdb_rdatatype_t negtype, sigtype;
5876         dns_trust_t trust;
5877         int idx;
5878
5879         /*
5880          * Add an rdatasetheader_t to a node.
5881          */
5882
5883         /*
5884          * Caller must be holding the node lock.
5885          */
5886
5887         if ((options & DNS_DBADD_MERGE) != 0) {
5888                 REQUIRE(rbtversion != NULL);
5889                 merge = ISC_TRUE;
5890         } else
5891                 merge = ISC_FALSE;
5892
5893         if ((options & DNS_DBADD_FORCE) != 0)
5894                 trust = dns_trust_ultimate;
5895         else
5896                 trust = newheader->trust;
5897
5898         if (rbtversion != NULL && !loading) {
5899                 /*
5900                  * We always add a changed record, even if no changes end up
5901                  * being made to this node, because it's harmless and
5902                  * simplifies the code.
5903                  */
5904                 changed = add_changed(rbtdb, rbtversion, rbtnode);
5905                 if (changed == NULL) {
5906                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5907                         return (ISC_R_NOMEMORY);
5908                 }
5909         }
5910
5911         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5912         topheader_prev = NULL;
5913         sigheader = NULL;
5914         negtype = 0;
5915         if (rbtversion == NULL && !newheader_nx) {
5916                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5917                 if (NEGATIVE(newheader)) {
5918                         /*
5919                          * We're adding a negative cache entry.
5920                          */
5921                         covers = RBTDB_RDATATYPE_EXT(newheader->type);
5922                         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
5923                                                         covers);
5924                         for (topheader = rbtnode->data;
5925                              topheader != NULL;
5926                              topheader = topheader->next) {
5927                                 /*
5928                                  * If we're adding an negative cache entry
5929                                  * which covers all types (NXDOMAIN,
5930                                  * NODATA(QTYPE=ANY)).
5931                                  *
5932                                  * We make all other data stale so that the
5933                                  * only rdataset that can be found at this
5934                                  * node is the negative cache entry.
5935                                  *
5936                                  * Otherwise look for any RRSIGs of the
5937                                  * given type so they can be marked stale
5938                                  * later.
5939                                  */
5940                                 if (covers == dns_rdatatype_any) {
5941                                         set_ttl(rbtdb, topheader, 0);
5942                                         topheader->attributes |=
5943                                                 RDATASET_ATTR_STALE;
5944                                         rbtnode->dirty = 1;
5945                                 } else if (topheader->type == sigtype)
5946                                         sigheader = topheader;
5947                         }
5948                         if (covers == dns_rdatatype_any)
5949                                 goto find_header;
5950                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5951                 } else {
5952                         /*
5953                          * We're adding something that isn't a
5954                          * negative cache entry.  Look for an extant
5955                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5956                          * cache entry.
5957                          */
5958                         for (topheader = rbtnode->data;
5959                              topheader != NULL;
5960                              topheader = topheader->next) {
5961                                 if (topheader->type ==
5962                                     RBTDB_RDATATYPE_NCACHEANY)
5963                                         break;
5964                         }
5965                         if (topheader != NULL && EXISTS(topheader) &&
5966                             topheader->rdh_ttl > now) {
5967                                 /*
5968                                  * Found one.
5969                                  */
5970                                 if (trust < topheader->trust) {
5971                                         /*
5972                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
5973                                          * is more trusted.
5974                                          */
5975                                         free_rdataset(rbtdb,
5976                                                       rbtdb->common.mctx,
5977                                                       newheader);
5978                                         if (addedrdataset != NULL)
5979                                                 bind_rdataset(rbtdb, rbtnode,
5980                                                               topheader, now,
5981                                                               addedrdataset);
5982                                         return (DNS_R_UNCHANGED);
5983                                 }
5984                                 /*
5985                                  * The new rdataset is better.  Expire the
5986                                  * NXDOMAIN/NODATA(QTYPE=ANY).
5987                                  */
5988                                 set_ttl(rbtdb, topheader, 0);
5989                                 topheader->attributes |= RDATASET_ATTR_STALE;
5990                                 rbtnode->dirty = 1;
5991                                 topheader = NULL;
5992                                 goto find_header;
5993                         }
5994                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5995                 }
5996         }
5997
5998         for (topheader = rbtnode->data;
5999              topheader != NULL;
6000              topheader = topheader->next) {
6001                 if (topheader->type == newheader->type ||
6002                     topheader->type == negtype)
6003                         break;
6004                 topheader_prev = topheader;
6005         }
6006
6007  find_header:
6008         /*
6009          * If header isn't NULL, we've found the right type.  There may be
6010          * IGNORE rdatasets between the top of the chain and the first real
6011          * data.  We skip over them.
6012          */
6013         header = topheader;
6014         while (header != NULL && IGNORE(header))
6015                 header = header->down;
6016         if (header != NULL) {
6017                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
6018
6019                 /*
6020                  * Deleting an already non-existent rdataset has no effect.
6021                  */
6022                 if (header_nx && newheader_nx) {
6023                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6024                         return (DNS_R_UNCHANGED);
6025                 }
6026
6027                 /*
6028                  * Trying to add an rdataset with lower trust to a cache DB
6029                  * has no effect, provided that the cache data isn't stale.
6030                  */
6031                 if (rbtversion == NULL && trust < header->trust &&
6032                     (header->rdh_ttl > now || header_nx)) {
6033                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6034                         if (addedrdataset != NULL)
6035                                 bind_rdataset(rbtdb, rbtnode, header, now,
6036                                               addedrdataset);
6037                         return (DNS_R_UNCHANGED);
6038                 }
6039
6040                 /*
6041                  * Don't merge if a nonexistent rdataset is involved.
6042                  */
6043                 if (merge && (header_nx || newheader_nx))
6044                         merge = ISC_FALSE;
6045
6046                 /*
6047                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
6048                  * that is the union of 'newheader' and 'header'.
6049                  */
6050                 if (merge) {
6051                         unsigned int flags = 0;
6052                         INSIST(rbtversion->serial >= header->serial);
6053                         merged = NULL;
6054                         result = ISC_R_SUCCESS;
6055
6056                         if ((options & DNS_DBADD_EXACT) != 0)
6057                                 flags |= DNS_RDATASLAB_EXACT;
6058                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
6059                              newheader->rdh_ttl != header->rdh_ttl)
6060                                         result = DNS_R_NOTEXACT;
6061                         else if (newheader->rdh_ttl != header->rdh_ttl)
6062                                 flags |= DNS_RDATASLAB_FORCE;
6063                         if (result == ISC_R_SUCCESS)
6064                                 result = dns_rdataslab_merge(
6065                                              (unsigned char *)header,
6066                                              (unsigned char *)newheader,
6067                                              (unsigned int)(sizeof(*newheader)),
6068                                              rbtdb->common.mctx,
6069                                              rbtdb->common.rdclass,
6070                                              (dns_rdatatype_t)header->type,
6071                                              flags, &merged);
6072                         if (result == ISC_R_SUCCESS) {
6073                                 /*
6074                                  * If 'header' has the same serial number as
6075                                  * we do, we could clean it up now if we knew
6076                                  * that our caller had no references to it.
6077                                  * We don't know this, however, so we leave it
6078                                  * alone.  It will get cleaned up when
6079                                  * clean_zone_node() runs.
6080                                  */
6081                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6082                                               newheader);
6083                                 newheader = (rdatasetheader_t *)merged;
6084                                 init_rdataset(rbtdb, newheader);
6085                                 if (loading && RESIGN(newheader) &&
6086                                     RESIGN(header) &&
6087                                     header->resign < newheader->resign)
6088                                         newheader->resign = header->resign;
6089                         } else {
6090                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6091                                               newheader);
6092                                 return (result);
6093                         }
6094                 }
6095                 /*
6096                  * Don't replace existing NS, A and AAAA RRsets
6097                  * in the cache if they are already exist.  This
6098                  * prevents named being locked to old servers.
6099                  * Don't lower trust of existing record if the
6100                  * update is forced.
6101                  */
6102                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
6103                     header->type == dns_rdatatype_ns &&
6104                     !header_nx && !newheader_nx &&
6105                     header->trust >= newheader->trust &&
6106                     dns_rdataslab_equalx((unsigned char *)header,
6107                                          (unsigned char *)newheader,
6108                                          (unsigned int)(sizeof(*newheader)),
6109                                          rbtdb->common.rdclass,
6110                                          (dns_rdatatype_t)header->type)) {
6111                         /*
6112                          * Honour the new ttl if it is less than the
6113                          * older one.
6114                          */
6115                         if (header->rdh_ttl > newheader->rdh_ttl)
6116                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
6117                         if (header->noqname == NULL &&
6118                             newheader->noqname != NULL) {
6119                                 header->noqname = newheader->noqname;
6120                                 newheader->noqname = NULL;
6121                         }
6122                         if (header->closest == NULL &&
6123                             newheader->closest != NULL) {
6124                                 header->closest = newheader->closest;
6125                                 newheader->closest = NULL;
6126                         }
6127                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6128                         if (addedrdataset != NULL)
6129                                 bind_rdataset(rbtdb, rbtnode, header, now,
6130                                               addedrdataset);
6131                         return (ISC_R_SUCCESS);
6132                 }
6133                 /*
6134                  * If we have will be replacing a NS RRset force its TTL
6135                  * to be no more than the current NS RRset's TTL.  This
6136                  * ensures the delegations that are withdrawn are honoured.
6137                  */
6138                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
6139                     header->type == dns_rdatatype_ns &&
6140                     !header_nx && !newheader_nx &&
6141                     header->trust <= newheader->trust) {
6142                         if (newheader->rdh_ttl > header->rdh_ttl) {
6143                                 newheader->rdh_ttl = header->rdh_ttl;
6144                         }
6145                 }
6146                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
6147                     (header->type == dns_rdatatype_a ||
6148                      header->type == dns_rdatatype_aaaa) &&
6149                     !header_nx && !newheader_nx &&
6150                     header->trust >= newheader->trust &&
6151                     dns_rdataslab_equal((unsigned char *)header,
6152                                         (unsigned char *)newheader,
6153                                         (unsigned int)(sizeof(*newheader)))) {
6154                         /*
6155                          * Honour the new ttl if it is less than the
6156                          * older one.
6157                          */
6158                         if (header->rdh_ttl > newheader->rdh_ttl)
6159                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
6160                         if (header->noqname == NULL &&
6161                             newheader->noqname != NULL) {
6162                                 header->noqname = newheader->noqname;
6163                                 newheader->noqname = NULL;
6164                         }
6165                         if (header->closest == NULL &&
6166                             newheader->closest != NULL) {
6167                                 header->closest = newheader->closest;
6168                                 newheader->closest = NULL;
6169                         }
6170                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6171                         if (addedrdataset != NULL)
6172                                 bind_rdataset(rbtdb, rbtnode, header, now,
6173                                               addedrdataset);
6174                         return (ISC_R_SUCCESS);
6175                 }
6176                 INSIST(rbtversion == NULL ||
6177                        rbtversion->serial >= topheader->serial);
6178                 if (topheader_prev != NULL)
6179                         topheader_prev->next = newheader;
6180                 else
6181                         rbtnode->data = newheader;
6182                 newheader->next = topheader->next;
6183                 if (loading) {
6184                         /*
6185                          * There are no other references to 'header' when
6186                          * loading, so we MAY clean up 'header' now.
6187                          * Since we don't generate changed records when
6188                          * loading, we MUST clean up 'header' now.
6189                          */
6190                         newheader->down = NULL;
6191                         free_rdataset(rbtdb, rbtdb->common.mctx, header);
6192                 } else {
6193                         newheader->down = topheader;
6194                         topheader->next = newheader;
6195                         rbtnode->dirty = 1;
6196                         if (changed != NULL)
6197                                 changed->dirty = ISC_TRUE;
6198                         if (rbtversion == NULL) {
6199                                 set_ttl(rbtdb, header, 0);
6200                                 header->attributes |= RDATASET_ATTR_STALE;
6201                                 if (sigheader != NULL) {
6202                                         set_ttl(rbtdb, sigheader, 0);
6203                                         sigheader->attributes |=
6204                                                  RDATASET_ATTR_STALE;
6205                                 }
6206                         }
6207                         idx = newheader->node->locknum;
6208                         if (IS_CACHE(rbtdb)) {
6209                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6210                                                  newheader, link);
6211                                 /*
6212                                  * XXXMLG We don't check the return value
6213                                  * here.  If it fails, we will not do TTL
6214                                  * based expiry on this node.  However, we
6215                                  * will do it on the LRU side, so memory
6216                                  * will not leak... for long.
6217                                  */
6218                                 isc_heap_insert(rbtdb->heaps[idx], newheader);
6219                         } else if (RESIGN(newheader))
6220                                 resign_insert(rbtdb, idx, newheader);
6221                 }
6222         } else {
6223                 /*
6224                  * No non-IGNORED rdatasets of the given type exist at
6225                  * this node.
6226                  */
6227
6228                 /*
6229                  * If we're trying to delete the type, don't bother.
6230                  */
6231                 if (newheader_nx) {
6232                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6233                         return (DNS_R_UNCHANGED);
6234                 }
6235
6236                 if (topheader != NULL) {
6237                         /*
6238                          * We have an list of rdatasets of the given type,
6239                          * but they're all marked IGNORE.  We simply insert
6240                          * the new rdataset at the head of the list.
6241                          *
6242                          * Ignored rdatasets cannot occur during loading, so
6243                          * we INSIST on it.
6244                          */
6245                         INSIST(!loading);
6246                         INSIST(rbtversion == NULL ||
6247                                rbtversion->serial >= topheader->serial);
6248                         if (topheader_prev != NULL)
6249                                 topheader_prev->next = newheader;
6250                         else
6251                                 rbtnode->data = newheader;
6252                         newheader->next = topheader->next;
6253                         newheader->down = topheader;
6254                         topheader->next = newheader;
6255                         rbtnode->dirty = 1;
6256                         if (changed != NULL)
6257                                 changed->dirty = ISC_TRUE;
6258                 } else {
6259                         /*
6260                          * No rdatasets of the given type exist at the node.
6261                          */
6262                         newheader->next = rbtnode->data;
6263                         newheader->down = NULL;
6264                         rbtnode->data = newheader;
6265                 }
6266                 idx = newheader->node->locknum;
6267                 if (IS_CACHE(rbtdb)) {
6268                         ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6269                                          newheader, link);
6270                         isc_heap_insert(rbtdb->heaps[idx], newheader);
6271                 } else if (RESIGN(newheader)) {
6272                         resign_insert(rbtdb, idx, newheader);
6273                 }
6274         }
6275
6276         /*
6277          * Check if the node now contains CNAME and other data.
6278          */
6279         if (rbtversion != NULL &&
6280             cname_and_other_data(rbtnode, rbtversion->serial))
6281                 return (DNS_R_CNAMEANDOTHER);
6282
6283         if (addedrdataset != NULL)
6284                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
6285
6286         return (ISC_R_SUCCESS);
6287 }
6288
6289 static inline isc_boolean_t
6290 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
6291                 rbtdb_rdatatype_t type)
6292 {
6293         if (IS_CACHE(rbtdb)) {
6294                 if (type == dns_rdatatype_dname)
6295                         return (ISC_TRUE);
6296                 else
6297                         return (ISC_FALSE);
6298         } else if (type == dns_rdatatype_dname ||
6299                    (type == dns_rdatatype_ns &&
6300                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
6301                 return (ISC_TRUE);
6302         return (ISC_FALSE);
6303 }
6304
6305 static inline isc_result_t
6306 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6307            dns_rdataset_t *rdataset)
6308 {
6309         struct noqname *noqname;
6310         isc_mem_t *mctx = rbtdb->common.mctx;
6311         dns_name_t name;
6312         dns_rdataset_t neg, negsig;
6313         isc_result_t result;
6314         isc_region_t r;
6315
6316         dns_name_init(&name, NULL);
6317         dns_rdataset_init(&neg);
6318         dns_rdataset_init(&negsig);
6319
6320         result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
6321         RUNTIME_CHECK(result == ISC_R_SUCCESS);
6322
6323         noqname = isc_mem_get(mctx, sizeof(*noqname));
6324         if (noqname == NULL) {
6325                 result = ISC_R_NOMEMORY;
6326                 goto cleanup;
6327         }
6328         dns_name_init(&noqname->name, NULL);
6329         noqname->neg = NULL;
6330         noqname->negsig = NULL;
6331         noqname->type = neg.type;
6332         result = dns_name_dup(&name, mctx, &noqname->name);
6333         if (result != ISC_R_SUCCESS)
6334                 goto cleanup;
6335         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6336         if (result != ISC_R_SUCCESS)
6337                 goto cleanup;
6338         noqname->neg = r.base;
6339         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6340         if (result != ISC_R_SUCCESS)
6341                 goto cleanup;
6342         noqname->negsig = r.base;
6343         dns_rdataset_disassociate(&neg);
6344         dns_rdataset_disassociate(&negsig);
6345         newheader->noqname = noqname;
6346         return (ISC_R_SUCCESS);
6347
6348 cleanup:
6349         dns_rdataset_disassociate(&neg);
6350         dns_rdataset_disassociate(&negsig);
6351         free_noqname(mctx, &noqname);
6352         return(result);
6353 }
6354
6355 static inline isc_result_t
6356 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6357            dns_rdataset_t *rdataset)
6358 {
6359         struct noqname *closest;
6360         isc_mem_t *mctx = rbtdb->common.mctx;
6361         dns_name_t name;
6362         dns_rdataset_t neg, negsig;
6363         isc_result_t result;
6364         isc_region_t r;
6365
6366         dns_name_init(&name, NULL);
6367         dns_rdataset_init(&neg);
6368         dns_rdataset_init(&negsig);
6369
6370         result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
6371         RUNTIME_CHECK(result == ISC_R_SUCCESS);
6372
6373         closest = isc_mem_get(mctx, sizeof(*closest));
6374         if (closest == NULL) {
6375                 result = ISC_R_NOMEMORY;
6376                 goto cleanup;
6377         }
6378         dns_name_init(&closest->name, NULL);
6379         closest->neg = NULL;
6380         closest->negsig = NULL;
6381         closest->type = neg.type;
6382         result = dns_name_dup(&name, mctx, &closest->name);
6383         if (result != ISC_R_SUCCESS)
6384                 goto cleanup;
6385         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6386         if (result != ISC_R_SUCCESS)
6387                 goto cleanup;
6388         closest->neg = r.base;
6389         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6390         if (result != ISC_R_SUCCESS)
6391                 goto cleanup;
6392         closest->negsig = r.base;
6393         dns_rdataset_disassociate(&neg);
6394         dns_rdataset_disassociate(&negsig);
6395         newheader->closest = closest;
6396         return (ISC_R_SUCCESS);
6397
6398  cleanup:
6399         dns_rdataset_disassociate(&neg);
6400         dns_rdataset_disassociate(&negsig);
6401         free_noqname(mctx, &closest);
6402         return(result);
6403 }
6404
6405 static dns_dbmethods_t zone_methods;
6406
6407 static isc_result_t
6408 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6409             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
6410             dns_rdataset_t *addedrdataset)
6411 {
6412         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6413         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6414         rbtdb_version_t *rbtversion = version;
6415         isc_region_t region;
6416         rdatasetheader_t *newheader;
6417         rdatasetheader_t *header;
6418         isc_result_t result;
6419         isc_boolean_t delegating;
6420         isc_boolean_t newnsec;
6421         isc_boolean_t tree_locked = ISC_FALSE;
6422         isc_boolean_t cache_is_overmem = ISC_FALSE;
6423
6424         REQUIRE(VALID_RBTDB(rbtdb));
6425         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
6426
6427         if (rbtdb->common.methods == &zone_methods)
6428                 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6429                           (rdataset->type == dns_rdatatype_nsec3 ||
6430                            rdataset->covers == dns_rdatatype_nsec3)) ||
6431                          (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6432                            rdataset->type != dns_rdatatype_nsec3 &&
6433                            rdataset->covers != dns_rdatatype_nsec3)));
6434
6435         if (rbtversion == NULL) {
6436                 if (now == 0)
6437                         isc_stdtime_get(&now);
6438         } else
6439                 now = 0;
6440
6441         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6442                                             &region, sizeof(rdatasetheader_t));
6443         if (result != ISC_R_SUCCESS)
6444                 return (result);
6445
6446         newheader = (rdatasetheader_t *)region.base;
6447         init_rdataset(rbtdb, newheader);
6448         set_ttl(rbtdb, newheader, rdataset->ttl + now);
6449         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6450                                                 rdataset->covers);
6451         newheader->attributes = 0;
6452         newheader->noqname = NULL;
6453         newheader->closest = NULL;
6454         newheader->count = init_count++;
6455         newheader->trust = rdataset->trust;
6456         newheader->additional_auth = NULL;
6457         newheader->additional_glue = NULL;
6458         newheader->last_used = now;
6459         newheader->node = rbtnode;
6460         if (rbtversion != NULL) {
6461                 newheader->serial = rbtversion->serial;
6462                 now = 0;
6463
6464                 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6465                         newheader->attributes |= RDATASET_ATTR_RESIGN;
6466                         newheader->resign = rdataset->resign;
6467                 } else
6468                         newheader->resign = 0;
6469         } else {
6470                 newheader->serial = 1;
6471                 newheader->resign = 0;
6472                 if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
6473                         newheader->attributes |= RDATASET_ATTR_NEGATIVE;
6474                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6475                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6476                 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6477                         newheader->attributes |= RDATASET_ATTR_OPTOUT;
6478                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6479                         result = addnoqname(rbtdb, newheader, rdataset);
6480                         if (result != ISC_R_SUCCESS) {
6481                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6482                                               newheader);
6483                                 return (result);
6484                         }
6485                 }
6486                 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6487                         result = addclosest(rbtdb, newheader, rdataset);
6488                         if (result != ISC_R_SUCCESS) {
6489                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6490                                               newheader);
6491                                 return (result);
6492                         }
6493                 }
6494         }
6495
6496         /*
6497          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6498          * just DNAME for the cache), then we need to set the callback bit
6499          * on the node.
6500          */
6501         if (delegating_type(rbtdb, rbtnode, rdataset->type))
6502                 delegating = ISC_TRUE;
6503         else
6504                 delegating = ISC_FALSE;
6505
6506         /*
6507          * Add to the auxiliary NSEC tree if we're adding an NSEC record.
6508          */
6509         if (rbtnode->nsec != DNS_RBT_NSEC_HAS_NSEC &&
6510             rdataset->type == dns_rdatatype_nsec)
6511                 newnsec = ISC_TRUE;
6512         else
6513                 newnsec = ISC_FALSE;
6514
6515         /*
6516          * If we're adding a delegation type, adding to the auxiliary NSEC tree,
6517          * or the DB is a cache in an overmem state, hold an exclusive lock on
6518          * the tree.  In the latter case the lock does not necessarily have to
6519          * be acquired but it will help purge stale entries more effectively.
6520          */
6521         if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
6522                 cache_is_overmem = ISC_TRUE;
6523         if (delegating || newnsec || cache_is_overmem) {
6524                 tree_locked = ISC_TRUE;
6525                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6526         }
6527
6528         if (cache_is_overmem)
6529                 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6530
6531         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6532                   isc_rwlocktype_write);
6533
6534         if (rbtdb->rrsetstats != NULL) {
6535                 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6536                 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6537         }
6538
6539         if (IS_CACHE(rbtdb)) {
6540                 if (tree_locked)
6541                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6542
6543                 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6544                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6545                         expire_header(rbtdb, header, tree_locked);
6546
6547                 /*
6548                  * If we've been holding a write lock on the tree just for
6549                  * cleaning, we can release it now.  However, we still need the
6550                  * node lock.
6551                  */
6552                 if (tree_locked && !delegating && !newnsec) {
6553                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6554                         tree_locked = ISC_FALSE;
6555                 }
6556         }
6557
6558         result = ISC_R_SUCCESS;
6559         if (newnsec) {
6560                 dns_fixedname_t fname;
6561                 dns_name_t *name;
6562                 dns_rbtnode_t *nsecnode;
6563
6564                 dns_fixedname_init(&fname);
6565                 name = dns_fixedname_name(&fname);
6566                 dns_rbt_fullnamefromnode(rbtnode, name);
6567                 nsecnode = NULL;
6568                 result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6569                 if (result == ISC_R_SUCCESS) {
6570                         nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6571                         rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6572                 } else if (result == ISC_R_EXISTS) {
6573                         rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6574                         result = ISC_R_SUCCESS;
6575                 }
6576         }
6577
6578         if (result == ISC_R_SUCCESS)
6579                 result = add(rbtdb, rbtnode, rbtversion, newheader, options,
6580                              ISC_FALSE, addedrdataset, now);
6581         if (result == ISC_R_SUCCESS && delegating)
6582                 rbtnode->find_callback = 1;
6583
6584         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6585                     isc_rwlocktype_write);
6586
6587         if (tree_locked)
6588                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6589
6590         /*
6591          * Update the zone's secure status.  If version is non-NULL
6592          * this is deferred until closeversion() is called.
6593          */
6594         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6595                 iszonesecure(db, version, rbtdb->origin_node);
6596
6597         return (result);
6598 }
6599
6600 static isc_result_t
6601 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6602                  dns_rdataset_t *rdataset, unsigned int options,
6603                  dns_rdataset_t *newrdataset)
6604 {
6605         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6606         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6607         rbtdb_version_t *rbtversion = version;
6608         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6609         unsigned char *subresult;
6610         isc_region_t region;
6611         isc_result_t result;
6612         rbtdb_changed_t *changed;
6613
6614         REQUIRE(VALID_RBTDB(rbtdb));
6615         REQUIRE(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
6616
6617         if (rbtdb->common.methods == &zone_methods)
6618                 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6619                           (rdataset->type == dns_rdatatype_nsec3 ||
6620                            rdataset->covers == dns_rdatatype_nsec3)) ||
6621                          (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6622                            rdataset->type != dns_rdatatype_nsec3 &&
6623                            rdataset->covers != dns_rdatatype_nsec3)));
6624
6625         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6626                                             &region,
6627                                             sizeof(rdatasetheader_t));
6628         if (result != ISC_R_SUCCESS)
6629                 return (result);
6630         newheader = (rdatasetheader_t *)region.base;
6631         init_rdataset(rbtdb, newheader);
6632         set_ttl(rbtdb, newheader, rdataset->ttl);
6633         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6634                                                 rdataset->covers);
6635         newheader->attributes = 0;
6636         newheader->serial = rbtversion->serial;
6637         newheader->trust = 0;
6638         newheader->noqname = NULL;
6639         newheader->closest = NULL;
6640         newheader->count = init_count++;
6641         newheader->additional_auth = NULL;
6642         newheader->additional_glue = NULL;
6643         newheader->last_used = 0;
6644         newheader->node = rbtnode;
6645         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6646                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6647                 newheader->resign = rdataset->resign;
6648         } else
6649                 newheader->resign = 0;
6650
6651         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6652                   isc_rwlocktype_write);
6653
6654         changed = add_changed(rbtdb, rbtversion, rbtnode);
6655         if (changed == NULL) {
6656                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6657                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6658                             isc_rwlocktype_write);
6659                 return (ISC_R_NOMEMORY);
6660         }
6661
6662         topheader_prev = NULL;
6663         for (topheader = rbtnode->data;
6664              topheader != NULL;
6665              topheader = topheader->next) {
6666                 if (topheader->type == newheader->type)
6667                         break;
6668                 topheader_prev = topheader;
6669         }
6670         /*
6671          * If header isn't NULL, we've found the right type.  There may be
6672          * IGNORE rdatasets between the top of the chain and the first real
6673          * data.  We skip over them.
6674          */
6675         header = topheader;
6676         while (header != NULL && IGNORE(header))
6677                 header = header->down;
6678         if (header != NULL && EXISTS(header)) {
6679                 unsigned int flags = 0;
6680                 subresult = NULL;
6681                 result = ISC_R_SUCCESS;
6682                 if ((options & DNS_DBSUB_EXACT) != 0) {
6683                         flags |= DNS_RDATASLAB_EXACT;
6684                         if (newheader->rdh_ttl != header->rdh_ttl)
6685                                 result = DNS_R_NOTEXACT;
6686                 }
6687                 if (result == ISC_R_SUCCESS)
6688                         result = dns_rdataslab_subtract(
6689                                         (unsigned char *)header,
6690                                         (unsigned char *)newheader,
6691                                         (unsigned int)(sizeof(*newheader)),
6692                                         rbtdb->common.mctx,
6693                                         rbtdb->common.rdclass,
6694                                         (dns_rdatatype_t)header->type,
6695                                         flags, &subresult);
6696                 if (result == ISC_R_SUCCESS) {
6697                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6698                         newheader = (rdatasetheader_t *)subresult;
6699                         init_rdataset(rbtdb, newheader);
6700                         /*
6701                          * We have to set the serial since the rdataslab
6702                          * subtraction routine copies the reserved portion of
6703                          * header, not newheader.
6704                          */
6705                         newheader->serial = rbtversion->serial;
6706                         /*
6707                          * XXXJT: dns_rdataslab_subtract() copied the pointers
6708                          * to additional info.  We need to clear these fields
6709                          * to avoid having duplicated references.
6710                          */
6711                         newheader->additional_auth = NULL;
6712                         newheader->additional_glue = NULL;
6713                 } else if (result == DNS_R_NXRRSET) {
6714                         /*
6715                          * This subtraction would remove all of the rdata;
6716                          * add a nonexistent header instead.
6717                          */
6718                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6719                         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6720                         if (newheader == NULL) {
6721                                 result = ISC_R_NOMEMORY;
6722                                 goto unlock;
6723                         }
6724                         set_ttl(rbtdb, newheader, 0);
6725                         newheader->type = topheader->type;
6726                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6727                         newheader->trust = 0;
6728                         newheader->serial = rbtversion->serial;
6729                         newheader->noqname = NULL;
6730                         newheader->closest = NULL;
6731                         newheader->count = 0;
6732                         newheader->additional_auth = NULL;
6733                         newheader->additional_glue = NULL;
6734                         newheader->node = rbtnode;
6735                         newheader->resign = 0;
6736                         newheader->last_used = 0;
6737                 } else {
6738                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6739                         goto unlock;
6740                 }
6741
6742                 /*
6743                  * If we're here, we want to link newheader in front of
6744                  * topheader.
6745                  */
6746                 INSIST(rbtversion->serial >= topheader->serial);
6747                 if (topheader_prev != NULL)
6748                         topheader_prev->next = newheader;
6749                 else
6750                         rbtnode->data = newheader;
6751                 newheader->next = topheader->next;
6752                 newheader->down = topheader;
6753                 topheader->next = newheader;
6754                 rbtnode->dirty = 1;
6755                 changed->dirty = ISC_TRUE;
6756         } else {
6757                 /*
6758                  * The rdataset doesn't exist, so we don't need to do anything
6759                  * to satisfy the deletion request.
6760                  */
6761                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6762                 if ((options & DNS_DBSUB_EXACT) != 0)
6763                         result = DNS_R_NOTEXACT;
6764                 else
6765                         result = DNS_R_UNCHANGED;
6766         }
6767
6768         if (result == ISC_R_SUCCESS && newrdataset != NULL)
6769                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6770
6771  unlock:
6772         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6773                     isc_rwlocktype_write);
6774
6775         /*
6776          * Update the zone's secure status.  If version is non-NULL
6777          * this is deferred until closeversion() is called.
6778          */
6779         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6780                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6781
6782         return (result);
6783 }
6784
6785 static isc_result_t
6786 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6787                dns_rdatatype_t type, dns_rdatatype_t covers)
6788 {
6789         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6790         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6791         rbtdb_version_t *rbtversion = version;
6792         isc_result_t result;
6793         rdatasetheader_t *newheader;
6794
6795         REQUIRE(VALID_RBTDB(rbtdb));
6796         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
6797
6798         if (type == dns_rdatatype_any)
6799                 return (ISC_R_NOTIMPLEMENTED);
6800         if (type == dns_rdatatype_rrsig && covers == 0)
6801                 return (ISC_R_NOTIMPLEMENTED);
6802
6803         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6804         if (newheader == NULL)
6805                 return (ISC_R_NOMEMORY);
6806         set_ttl(rbtdb, newheader, 0);
6807         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6808         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6809         newheader->trust = 0;
6810         newheader->noqname = NULL;
6811         newheader->closest = NULL;
6812         newheader->additional_auth = NULL;
6813         newheader->additional_glue = NULL;
6814         if (rbtversion != NULL)
6815                 newheader->serial = rbtversion->serial;
6816         else
6817                 newheader->serial = 0;
6818         newheader->count = 0;
6819         newheader->last_used = 0;
6820         newheader->node = rbtnode;
6821
6822         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6823                   isc_rwlocktype_write);
6824
6825         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6826                      ISC_FALSE, NULL, 0);
6827
6828         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6829                     isc_rwlocktype_write);
6830
6831         /*
6832          * Update the zone's secure status.  If version is non-NULL
6833          * this is deferred until closeversion() is called.
6834          */
6835         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6836                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6837
6838         return (result);
6839 }
6840
6841 /*
6842  * load a non-NSEC3 node in the main tree and optionally to the auxiliary NSEC
6843  */
6844 static isc_result_t
6845 loadnode(dns_rbtdb_t *rbtdb, dns_name_t *name, dns_rbtnode_t **nodep,
6846          isc_boolean_t hasnsec)
6847 {
6848         isc_result_t noderesult, nsecresult;
6849         dns_rbtnode_t *nsecnode;
6850
6851         noderesult = dns_rbt_addnode(rbtdb->tree, name, nodep);
6852
6853 #ifdef BIND9
6854         if (noderesult == ISC_R_SUCCESS)
6855                 dns_rpz_cidr_addip(rbtdb->rpz_cidr, name);
6856 #endif
6857
6858         if (!hasnsec)
6859                 return (noderesult);
6860         if (noderesult == ISC_R_EXISTS) {
6861                 /*
6862                  * Add a node to the auxiliary NSEC tree for an old node
6863                  * just now getting an NSEC record.
6864                  */
6865                 if ((*nodep)->nsec == DNS_RBT_NSEC_HAS_NSEC)
6866                         return (noderesult);
6867         } else if (noderesult != ISC_R_SUCCESS) {
6868                 return (noderesult);
6869         }
6870
6871         /*
6872          * Build the auxiliary tree for NSECs as we go.
6873          * This tree speeds searches for closest NSECs that would otherwise
6874          * need to examine many irrelevant nodes in large TLDs.
6875          *
6876          * Add nodes to the auxiliary tree after corresponding nodes have
6877          * been added to the main tree.
6878          */
6879         nsecnode = NULL;
6880         nsecresult = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6881         if (nsecresult == ISC_R_SUCCESS) {
6882                 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6883                 (*nodep)->nsec = DNS_RBT_NSEC_HAS_NSEC;
6884                 return (noderesult);
6885         }
6886
6887         if (nsecresult == ISC_R_EXISTS) {
6888 #if 1 /* 0 */
6889                 isc_log_write(dns_lctx,
6890                               DNS_LOGCATEGORY_DATABASE,
6891                               DNS_LOGMODULE_CACHE,
6892                               ISC_LOG_WARNING,
6893                               "addnode: NSEC node already exists");
6894 #endif
6895                 (*nodep)->nsec = DNS_RBT_NSEC_HAS_NSEC;
6896                 return (noderesult);
6897         }
6898
6899         nsecresult = dns_rbt_deletenode(rbtdb->tree, *nodep, ISC_FALSE);
6900         if (nsecresult != ISC_R_SUCCESS)
6901                 isc_log_write(dns_lctx,
6902                               DNS_LOGCATEGORY_DATABASE,
6903                               DNS_LOGMODULE_CACHE,
6904                               ISC_LOG_WARNING,
6905                               "loading_addrdataset: "
6906                               "dns_rbt_deletenode: %s after "
6907                               "dns_rbt_addnode(NSEC): %s",
6908                               isc_result_totext(nsecresult),
6909                               isc_result_totext(noderesult));
6910         return (noderesult);
6911 }
6912
6913 static isc_result_t
6914 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6915         rbtdb_load_t *loadctx = arg;
6916         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6917         dns_rbtnode_t *node;
6918         isc_result_t result;
6919         isc_region_t region;
6920         rdatasetheader_t *newheader;
6921
6922         /*
6923          * This routine does no node locking.  See comments in
6924          * 'load' below for more information on loading and
6925          * locking.
6926          */
6927
6928
6929         /*
6930          * SOA records are only allowed at top of zone.
6931          */
6932         if (rdataset->type == dns_rdatatype_soa &&
6933             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6934                 return (DNS_R_NOTZONETOP);
6935
6936         if (rdataset->type != dns_rdatatype_nsec3 &&
6937             rdataset->covers != dns_rdatatype_nsec3)
6938                 add_empty_wildcards(rbtdb, name);
6939
6940         if (dns_name_iswildcard(name)) {
6941                 /*
6942                  * NS record owners cannot legally be wild cards.
6943                  */
6944                 if (rdataset->type == dns_rdatatype_ns)
6945                         return (DNS_R_INVALIDNS);
6946                 /*
6947                  * NSEC3 record owners cannot legally be wild cards.
6948                  */
6949                 if (rdataset->type == dns_rdatatype_nsec3)
6950                         return (DNS_R_INVALIDNSEC3);
6951                 result = add_wildcard_magic(rbtdb, name);
6952                 if (result != ISC_R_SUCCESS)
6953                         return (result);
6954         }
6955
6956         node = NULL;
6957         if (rdataset->type == dns_rdatatype_nsec3 ||
6958             rdataset->covers == dns_rdatatype_nsec3) {
6959                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6960                 if (result == ISC_R_SUCCESS)
6961                         node->nsec = DNS_RBT_NSEC_NSEC3;
6962         } else if (rdataset->type == dns_rdatatype_nsec) {
6963                 result = loadnode(rbtdb, name, &node, ISC_TRUE);
6964         } else {
6965                 result = loadnode(rbtdb, name, &node, ISC_FALSE);
6966         }
6967         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6968                 return (result);
6969         if (result == ISC_R_SUCCESS) {
6970                 dns_name_t foundname;
6971                 dns_name_init(&foundname, NULL);
6972                 dns_rbt_namefromnode(node, &foundname);
6973 #ifdef DNS_RBT_USEHASH
6974                 node->locknum = node->hashval % rbtdb->node_lock_count;
6975 #else
6976                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6977                         rbtdb->node_lock_count;
6978 #endif
6979         }
6980
6981         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6982                                             &region,
6983                                             sizeof(rdatasetheader_t));
6984         if (result != ISC_R_SUCCESS)
6985                 return (result);
6986         newheader = (rdatasetheader_t *)region.base;
6987         init_rdataset(rbtdb, newheader);
6988         set_ttl(rbtdb, newheader,
6989                 rdataset->ttl + loadctx->now); /* XXX overflow check */
6990         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6991                                                 rdataset->covers);
6992         newheader->attributes = 0;
6993         newheader->trust = rdataset->trust;
6994         newheader->serial = 1;
6995         newheader->noqname = NULL;
6996         newheader->closest = NULL;
6997         newheader->count = init_count++;
6998         newheader->additional_auth = NULL;
6999         newheader->additional_glue = NULL;
7000         newheader->last_used = 0;
7001         newheader->node = node;
7002         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
7003                 newheader->attributes |= RDATASET_ATTR_RESIGN;
7004                 newheader->resign = rdataset->resign;
7005         } else
7006                 newheader->resign = 0;
7007
7008         result = add(rbtdb, node, rbtdb->current_version, newheader,
7009                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
7010         if (result == ISC_R_SUCCESS &&
7011             delegating_type(rbtdb, node, rdataset->type))
7012                 node->find_callback = 1;
7013         else if (result == DNS_R_UNCHANGED)
7014                 result = ISC_R_SUCCESS;
7015
7016         return (result);
7017 }
7018
7019 static isc_result_t
7020 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
7021         rbtdb_load_t *loadctx;
7022         dns_rbtdb_t *rbtdb;
7023
7024         rbtdb = (dns_rbtdb_t *)db;
7025
7026         REQUIRE(VALID_RBTDB(rbtdb));
7027
7028         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
7029         if (loadctx == NULL)
7030                 return (ISC_R_NOMEMORY);
7031
7032         loadctx->rbtdb = rbtdb;
7033         if (IS_CACHE(rbtdb))
7034                 isc_stdtime_get(&loadctx->now);
7035         else
7036                 loadctx->now = 0;
7037
7038         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7039
7040         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
7041                 == 0);
7042         rbtdb->attributes |= RBTDB_ATTR_LOADING;
7043
7044         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7045
7046         *addp = loading_addrdataset;
7047         *dbloadp = loadctx;
7048
7049         return (ISC_R_SUCCESS);
7050 }
7051
7052 static isc_result_t
7053 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
7054         rbtdb_load_t *loadctx;
7055         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7056
7057         REQUIRE(VALID_RBTDB(rbtdb));
7058         REQUIRE(dbloadp != NULL);
7059         loadctx = *dbloadp;
7060         REQUIRE(loadctx->rbtdb == rbtdb);
7061
7062         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7063
7064         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
7065         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
7066
7067         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
7068         rbtdb->attributes |= RBTDB_ATTR_LOADED;
7069
7070         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7071
7072         /*
7073          * If there's a KEY rdataset at the zone origin containing a
7074          * zone key, we consider the zone secure.
7075          */
7076         if (! IS_CACHE(rbtdb))
7077                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
7078
7079         *dbloadp = NULL;
7080
7081         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
7082
7083         return (ISC_R_SUCCESS);
7084 }
7085
7086 static isc_result_t
7087 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
7088      dns_masterformat_t masterformat) {
7089         dns_rbtdb_t *rbtdb;
7090         rbtdb_version_t *rbtversion = version;
7091
7092         rbtdb = (dns_rbtdb_t *)db;
7093
7094         REQUIRE(VALID_RBTDB(rbtdb));
7095         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
7096
7097 #ifdef BIND9
7098         return (dns_master_dump2(rbtdb->common.mctx, db, version,
7099                                  &dns_master_style_default,
7100                                  filename, masterformat));
7101 #else
7102         UNUSED(version);
7103         UNUSED(filename);
7104         UNUSED(masterformat);
7105
7106         return (ISC_R_NOTIMPLEMENTED);
7107 #endif /* BIND9 */
7108 }
7109
7110 static void
7111 delete_callback(void *data, void *arg) {
7112         dns_rbtdb_t *rbtdb = arg;
7113         rdatasetheader_t *current, *next;
7114         unsigned int locknum;
7115
7116         current = data;
7117         locknum = current->node->locknum;
7118         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
7119         while (current != NULL) {
7120                 next = current->next;
7121                 free_rdataset(rbtdb, rbtdb->common.mctx, current);
7122                 current = next;
7123         }
7124         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
7125 }
7126
7127 static isc_boolean_t
7128 issecure(dns_db_t *db) {
7129         dns_rbtdb_t *rbtdb;
7130         isc_boolean_t secure;
7131
7132         rbtdb = (dns_rbtdb_t *)db;
7133
7134         REQUIRE(VALID_RBTDB(rbtdb));
7135
7136         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7137         secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
7138         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7139
7140         return (secure);
7141 }
7142
7143 static isc_boolean_t
7144 isdnssec(dns_db_t *db) {
7145         dns_rbtdb_t *rbtdb;
7146         isc_boolean_t dnssec;
7147
7148         rbtdb = (dns_rbtdb_t *)db;
7149
7150         REQUIRE(VALID_RBTDB(rbtdb));
7151
7152         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7153         dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
7154         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7155
7156         return (dnssec);
7157 }
7158
7159 static unsigned int
7160 nodecount(dns_db_t *db) {
7161         dns_rbtdb_t *rbtdb;
7162         unsigned int count;
7163
7164         rbtdb = (dns_rbtdb_t *)db;
7165
7166         REQUIRE(VALID_RBTDB(rbtdb));
7167
7168         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7169         count = dns_rbt_nodecount(rbtdb->tree);
7170         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7171
7172         return (count);
7173 }
7174
7175 static void
7176 settask(dns_db_t *db, isc_task_t *task) {
7177         dns_rbtdb_t *rbtdb;
7178
7179         rbtdb = (dns_rbtdb_t *)db;
7180
7181         REQUIRE(VALID_RBTDB(rbtdb));
7182
7183         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7184         if (rbtdb->task != NULL)
7185                 isc_task_detach(&rbtdb->task);
7186         if (task != NULL)
7187                 isc_task_attach(task, &rbtdb->task);
7188         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7189 }
7190
7191 static isc_boolean_t
7192 ispersistent(dns_db_t *db) {
7193         UNUSED(db);
7194         return (ISC_FALSE);
7195 }
7196
7197 static isc_result_t
7198 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
7199         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7200         dns_rbtnode_t *onode;
7201         isc_result_t result = ISC_R_SUCCESS;
7202
7203         REQUIRE(VALID_RBTDB(rbtdb));
7204         REQUIRE(nodep != NULL && *nodep == NULL);
7205
7206         /* Note that the access to origin_node doesn't require a DB lock */
7207         onode = (dns_rbtnode_t *)rbtdb->origin_node;
7208         if (onode != NULL) {
7209                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
7210                 new_reference(rbtdb, onode);
7211                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
7212
7213                 *nodep = rbtdb->origin_node;
7214         } else {
7215                 INSIST(IS_CACHE(rbtdb));
7216                 result = ISC_R_NOTFOUND;
7217         }
7218
7219         return (result);
7220 }
7221
7222 static isc_result_t
7223 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
7224                    isc_uint8_t *flags, isc_uint16_t *iterations,
7225                    unsigned char *salt, size_t *salt_length)
7226 {
7227         dns_rbtdb_t *rbtdb;
7228         isc_result_t result = ISC_R_NOTFOUND;
7229         rbtdb_version_t *rbtversion = version;
7230
7231         rbtdb = (dns_rbtdb_t *)db;
7232
7233         REQUIRE(VALID_RBTDB(rbtdb));
7234         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
7235
7236         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7237
7238         if (rbtversion == NULL)
7239                 rbtversion = rbtdb->current_version;
7240
7241         if (rbtversion->havensec3) {
7242                 if (hash != NULL)
7243                         *hash = rbtversion->hash;
7244                 if (salt != NULL && salt_length != NULL) {
7245                         REQUIRE(*salt_length >= rbtversion->salt_length);
7246                         memcpy(salt, rbtversion->salt, rbtversion->salt_length);
7247                 }
7248                 if (salt_length != NULL)
7249                         *salt_length = rbtversion->salt_length;
7250                 if (iterations != NULL)
7251                         *iterations = rbtversion->iterations;
7252                 if (flags != NULL)
7253                         *flags = rbtversion->flags;
7254                 result = ISC_R_SUCCESS;
7255         }
7256         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7257
7258         return (result);
7259 }
7260
7261 static isc_result_t
7262 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
7263         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7264         isc_stdtime_t oldresign;
7265         isc_result_t result = ISC_R_SUCCESS;
7266         rdatasetheader_t *header;
7267
7268         REQUIRE(VALID_RBTDB(rbtdb));
7269         REQUIRE(!IS_CACHE(rbtdb));
7270         REQUIRE(rdataset != NULL);
7271
7272         header = rdataset->private3;
7273         header--;
7274
7275         NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
7276                   isc_rwlocktype_write);
7277
7278         oldresign = header->resign;
7279         header->resign = resign;
7280         if (header->heap_index != 0) {
7281                 INSIST(RESIGN(header));
7282                 if (resign == 0) {
7283                         isc_heap_delete(rbtdb->heaps[header->node->locknum],
7284                                         header->heap_index);
7285                         header->heap_index = 0;
7286                 } else if (resign < oldresign)
7287                         isc_heap_increased(rbtdb->heaps[header->node->locknum],
7288                                            header->heap_index);
7289                 else if (resign > oldresign)
7290                         isc_heap_decreased(rbtdb->heaps[header->node->locknum],
7291                                            header->heap_index);
7292         } else if (resign && header->heap_index == 0) {
7293                 header->attributes |= RDATASET_ATTR_RESIGN;
7294                 result = resign_insert(rbtdb, header->node->locknum, header);
7295         }
7296         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7297                     isc_rwlocktype_write);
7298         return (result);
7299 }
7300
7301 static isc_result_t
7302 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
7303                dns_name_t *foundname)
7304 {
7305         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7306         rdatasetheader_t *header = NULL, *this;
7307         unsigned int i;
7308         isc_result_t result = ISC_R_NOTFOUND;
7309         unsigned int locknum;
7310
7311         REQUIRE(VALID_RBTDB(rbtdb));
7312
7313         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7314
7315         for (i = 0; i < rbtdb->node_lock_count; i++) {
7316                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
7317                 this = isc_heap_element(rbtdb->heaps[i], 1);
7318                 if (this == NULL) {
7319                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7320                                     isc_rwlocktype_read);
7321                         continue;
7322                 }
7323                 if (header == NULL)
7324                         header = this;
7325                 else if (isc_serial_lt(this->resign, header->resign)) {
7326                         locknum = header->node->locknum;
7327                         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
7328                                     isc_rwlocktype_read);
7329                         header = this;
7330                 } else
7331                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7332                                     isc_rwlocktype_read);
7333         }
7334
7335         if (header == NULL)
7336                 goto unlock;
7337
7338         bind_rdataset(rbtdb, header->node, header, 0, rdataset);
7339
7340         if (foundname != NULL)
7341                 dns_rbt_fullnamefromnode(header->node, foundname);
7342
7343         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7344                     isc_rwlocktype_read);
7345
7346         result = ISC_R_SUCCESS;
7347
7348  unlock:
7349         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7350
7351         return (result);
7352 }
7353
7354 static void
7355 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
7356 {
7357         rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
7358         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7359         dns_rbtnode_t *node;
7360         rdatasetheader_t *header;
7361
7362         REQUIRE(VALID_RBTDB(rbtdb));
7363         REQUIRE(rdataset != NULL);
7364         REQUIRE(rdataset->methods == &rdataset_methods);
7365         REQUIRE(rbtdb->future_version == rbtversion);
7366         REQUIRE(rbtversion != NULL);
7367         REQUIRE(rbtversion->writer);
7368         REQUIRE(rbtversion->rbtdb == rbtdb);
7369
7370         node = rdataset->private2;
7371         INSIST(node != NULL);
7372         header = rdataset->private3;
7373         INSIST(header != NULL);
7374         header--;
7375
7376         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7377         NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
7378                   isc_rwlocktype_write);
7379         /*
7380          * Delete from heap and save to re-signed list so that it can
7381          * be restored if we backout of this change.
7382          */
7383         new_reference(rbtdb, node);
7384         isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
7385         header->heap_index = 0;
7386         ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
7387
7388         NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
7389                     isc_rwlocktype_write);
7390         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7391 }
7392
7393 static dns_stats_t *
7394 getrrsetstats(dns_db_t *db) {
7395         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7396
7397         REQUIRE(VALID_RBTDB(rbtdb));
7398         REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
7399
7400         return (rbtdb->rrsetstats);
7401 }
7402
7403 static dns_dbmethods_t zone_methods = {
7404         attach,
7405         detach,
7406         beginload,
7407         endload,
7408         dump,
7409         currentversion,
7410         newversion,
7411         attachversion,
7412         closeversion,
7413         findnode,
7414         zone_find,
7415         zone_findzonecut,
7416         attachnode,
7417         detachnode,
7418         expirenode,
7419         printnode,
7420         createiterator,
7421         zone_findrdataset,
7422         allrdatasets,
7423         addrdataset,
7424         subtractrdataset,
7425         deleterdataset,
7426         issecure,
7427         nodecount,
7428         ispersistent,
7429         overmem,
7430         settask,
7431         getoriginnode,
7432         NULL,
7433         getnsec3parameters,
7434         findnsec3node,
7435         setsigningtime,
7436         getsigningtime,
7437         resigned,
7438         isdnssec,
7439         NULL,
7440 #ifdef BIND9
7441         get_rpz_enabled,
7442         rpz_findips
7443 #else
7444         NULL,
7445         NULL
7446 #endif
7447 };
7448
7449 static dns_dbmethods_t cache_methods = {
7450         attach,
7451         detach,
7452         beginload,
7453         endload,
7454         dump,
7455         currentversion,
7456         newversion,
7457         attachversion,
7458         closeversion,
7459         findnode,
7460         cache_find,
7461         cache_findzonecut,
7462         attachnode,
7463         detachnode,
7464         expirenode,
7465         printnode,
7466         createiterator,
7467         cache_findrdataset,
7468         allrdatasets,
7469         addrdataset,
7470         subtractrdataset,
7471         deleterdataset,
7472         issecure,
7473         nodecount,
7474         ispersistent,
7475         overmem,
7476         settask,
7477         getoriginnode,
7478         NULL,
7479         NULL,
7480         NULL,
7481         NULL,
7482         NULL,
7483         NULL,
7484         isdnssec,
7485         getrrsetstats,
7486         NULL,
7487         NULL
7488 };
7489
7490 isc_result_t
7491 #ifdef DNS_RBTDB_VERSION64
7492 dns_rbtdb64_create
7493 #else
7494 dns_rbtdb_create
7495 #endif
7496                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
7497                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
7498                  void *driverarg, dns_db_t **dbp)
7499 {
7500         dns_rbtdb_t *rbtdb;
7501         isc_result_t result;
7502         int i;
7503         dns_name_t name;
7504         isc_boolean_t (*sooner)(void *, void *);
7505         isc_mem_t *hmctx = mctx;
7506
7507         /* Keep the compiler happy. */
7508         UNUSED(driverarg);
7509
7510         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
7511         if (rbtdb == NULL)
7512                 return (ISC_R_NOMEMORY);
7513
7514         /*
7515          * If argv[0] exists, it points to a memory context to use for heap
7516          */
7517         if (argc != 0)
7518                 hmctx = (isc_mem_t *) argv[0];
7519
7520         memset(rbtdb, '\0', sizeof(*rbtdb));
7521         dns_name_init(&rbtdb->common.origin, NULL);
7522         rbtdb->common.attributes = 0;
7523         if (type == dns_dbtype_cache) {
7524                 rbtdb->common.methods = &cache_methods;
7525                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
7526         } else if (type == dns_dbtype_stub) {
7527                 rbtdb->common.methods = &zone_methods;
7528                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
7529         } else
7530                 rbtdb->common.methods = &zone_methods;
7531         rbtdb->common.rdclass = rdclass;
7532         rbtdb->common.mctx = NULL;
7533
7534         result = RBTDB_INITLOCK(&rbtdb->lock);
7535         if (result != ISC_R_SUCCESS)
7536                 goto cleanup_rbtdb;
7537
7538         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
7539         if (result != ISC_R_SUCCESS)
7540                 goto cleanup_lock;
7541
7542         /*
7543          * Initialize node_lock_count in a generic way to support future
7544          * extension which allows the user to specify this value on creation.
7545          * Note that when specified for a cache DB it must be larger than 1
7546          * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
7547          */
7548         if (rbtdb->node_lock_count == 0) {
7549                 if (IS_CACHE(rbtdb))
7550                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
7551                 else
7552                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
7553         } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
7554                 result = ISC_R_RANGE;
7555                 goto cleanup_tree_lock;
7556         }
7557         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
7558         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
7559                                         sizeof(rbtdb_nodelock_t));
7560         if (rbtdb->node_locks == NULL) {
7561                 result = ISC_R_NOMEMORY;
7562                 goto cleanup_tree_lock;
7563         }
7564
7565         rbtdb->rrsetstats = NULL;
7566         if (IS_CACHE(rbtdb)) {
7567                 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
7568                 if (result != ISC_R_SUCCESS)
7569                         goto cleanup_node_locks;
7570                 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
7571                                                sizeof(rdatasetheaderlist_t));
7572                 if (rbtdb->rdatasets == NULL) {
7573                         result = ISC_R_NOMEMORY;
7574                         goto cleanup_rrsetstats;
7575                 }
7576                 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7577                         ISC_LIST_INIT(rbtdb->rdatasets[i]);
7578         } else
7579                 rbtdb->rdatasets = NULL;
7580
7581         /*
7582          * Create the heaps.
7583          */
7584         rbtdb->heaps = isc_mem_get(hmctx, rbtdb->node_lock_count *
7585                                    sizeof(isc_heap_t *));
7586         if (rbtdb->heaps == NULL) {
7587                 result = ISC_R_NOMEMORY;
7588                 goto cleanup_rdatasets;
7589         }
7590         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7591                 rbtdb->heaps[i] = NULL;
7592         sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
7593         for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
7594                 result = isc_heap_create(hmctx, sooner, set_index, 0,
7595                                          &rbtdb->heaps[i]);
7596                 if (result != ISC_R_SUCCESS)
7597                         goto cleanup_heaps;
7598         }
7599
7600         /*
7601          * Create deadnode lists.
7602          */
7603         rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
7604                                        sizeof(rbtnodelist_t));
7605         if (rbtdb->deadnodes == NULL) {
7606                 result = ISC_R_NOMEMORY;
7607                 goto cleanup_heaps;
7608         }
7609         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7610                 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7611
7612         rbtdb->active = rbtdb->node_lock_count;
7613
7614         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7615                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7616                 if (result == ISC_R_SUCCESS) {
7617                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7618                         if (result != ISC_R_SUCCESS)
7619                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7620                 }
7621                 if (result != ISC_R_SUCCESS) {
7622                         while (i-- > 0) {
7623                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7624                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7625                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7626                         }
7627                         goto cleanup_deadnodes;
7628                 }
7629                 rbtdb->node_locks[i].exiting = ISC_FALSE;
7630         }
7631
7632         /*
7633          * Attach to the mctx.  The database will persist so long as there
7634          * are references to it, and attaching to the mctx ensures that our
7635          * mctx won't disappear out from under us.
7636          */
7637         isc_mem_attach(mctx, &rbtdb->common.mctx);
7638         isc_mem_attach(hmctx, &rbtdb->hmctx);
7639
7640         /*
7641          * Must be initialized before free_rbtdb() is called.
7642          */
7643         isc_ondestroy_init(&rbtdb->common.ondest);
7644
7645         /*
7646          * Make a copy of the origin name.
7647          */
7648         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7649         if (result != ISC_R_SUCCESS) {
7650                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7651                 return (result);
7652         }
7653
7654         /*
7655          * Make the Red-Black Trees.
7656          */
7657         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7658         if (result != ISC_R_SUCCESS) {
7659                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7660                 return (result);
7661         }
7662
7663         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec);
7664         if (result != ISC_R_SUCCESS) {
7665                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7666                 return (result);
7667         }
7668
7669         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7670         if (result != ISC_R_SUCCESS) {
7671                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7672                 return (result);
7673         }
7674
7675 #ifdef BIND9
7676         /*
7677          * Get ready for response policy IP address searching if at least one
7678          * zone has been configured as a response policy zone and this
7679          * is not a cache zone.
7680          * It would be better to know that this database is for a policy
7681          * zone named for a view, but that would require knowledge from
7682          * above such as an argv[] set from data in the zone.
7683          */
7684         if (type == dns_dbtype_zone && !dns_name_equal(origin, dns_rootname)) {
7685                 result = dns_rpz_new_cidr(mctx, origin, &rbtdb->rpz_cidr);
7686                 if (result != ISC_R_SUCCESS) {
7687                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7688                         return (result);
7689                 }
7690         }
7691 #endif
7692
7693         /*
7694          * In order to set the node callback bit correctly in zone databases,
7695          * we need to know if the node has the origin name of the zone.
7696          * In loading_addrdataset() we could simply compare the new name
7697          * to the origin name, but this is expensive.  Also, we don't know the
7698          * node name in addrdataset(), so we need another way of knowing the
7699          * zone's top.
7700          *
7701          * We now explicitly create a node for the zone's origin, and then
7702          * we simply remember the node's address.  This is safe, because
7703          * the top-of-zone node can never be deleted, nor can its address
7704          * change.
7705          */
7706         if (!IS_CACHE(rbtdb)) {
7707                 dns_rbtnode_t *nsec3node;
7708
7709                 rbtdb->origin_node = NULL;
7710                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7711                                          &rbtdb->origin_node);
7712                 if (result != ISC_R_SUCCESS) {
7713                         INSIST(result != ISC_R_EXISTS);
7714                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7715                         return (result);
7716                 }
7717                 rbtdb->origin_node->nsec = DNS_RBT_NSEC_NORMAL;
7718                 /*
7719                  * We need to give the origin node the right locknum.
7720                  */
7721                 dns_name_init(&name, NULL);
7722                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7723 #ifdef DNS_RBT_USEHASH
7724                 rbtdb->origin_node->locknum =
7725                         rbtdb->origin_node->hashval %
7726                         rbtdb->node_lock_count;
7727 #else
7728                 rbtdb->origin_node->locknum =
7729                         dns_name_hash(&name, ISC_TRUE) %
7730                         rbtdb->node_lock_count;
7731 #endif
7732                 /*
7733                  * Add an apex node to the NSEC3 tree so that NSEC3 searches
7734                  * return partial matches when there is only a single NSEC3
7735                  * record in the tree.
7736                  */
7737                 nsec3node = NULL;
7738                 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
7739                                          &nsec3node);
7740                 if (result != ISC_R_SUCCESS) {
7741                         INSIST(result != ISC_R_EXISTS);
7742                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7743                         return (result);
7744                 }
7745                 nsec3node->nsec = DNS_RBT_NSEC_NSEC3;
7746                 /*
7747                  * We need to give the nsec3 origin node the right locknum.
7748                  */
7749                 dns_name_init(&name, NULL);
7750                 dns_rbt_namefromnode(nsec3node, &name);
7751 #ifdef DNS_RBT_USEHASH
7752                 nsec3node->locknum = nsec3node->hashval %
7753                         rbtdb->node_lock_count;
7754 #else
7755                 nsec3node->locknum = dns_name_hash(&name, ISC_TRUE) %
7756                         rbtdb->node_lock_count;
7757 #endif
7758         }
7759
7760         /*
7761          * Misc. Initialization.
7762          */
7763         result = isc_refcount_init(&rbtdb->references, 1);
7764         if (result != ISC_R_SUCCESS) {
7765                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7766                 return (result);
7767         }
7768         rbtdb->attributes = 0;
7769         rbtdb->task = NULL;
7770
7771         /*
7772          * Version Initialization.
7773          */
7774         rbtdb->current_serial = 1;
7775         rbtdb->least_serial = 1;
7776         rbtdb->next_serial = 2;
7777         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7778         if (rbtdb->current_version == NULL) {
7779                 isc_refcount_decrement(&rbtdb->references, NULL);
7780                 isc_refcount_destroy(&rbtdb->references);
7781                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7782                 return (ISC_R_NOMEMORY);
7783         }
7784         rbtdb->current_version->rbtdb = rbtdb;
7785         rbtdb->current_version->secure = dns_db_insecure;
7786         rbtdb->current_version->havensec3 = ISC_FALSE;
7787         rbtdb->current_version->flags = 0;
7788         rbtdb->current_version->iterations = 0;
7789         rbtdb->current_version->hash = 0;
7790         rbtdb->current_version->salt_length = 0;
7791         memset(rbtdb->current_version->salt, 0,
7792                sizeof(rbtdb->current_version->salt));
7793         rbtdb->future_version = NULL;
7794         ISC_LIST_INIT(rbtdb->open_versions);
7795         /*
7796          * Keep the current version in the open list so that list operation
7797          * won't happen in normal lookup operations.
7798          */
7799         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7800
7801         rbtdb->common.magic = DNS_DB_MAGIC;
7802         rbtdb->common.impmagic = RBTDB_MAGIC;
7803
7804         *dbp = (dns_db_t *)rbtdb;
7805
7806         return (ISC_R_SUCCESS);
7807
7808  cleanup_deadnodes:
7809         isc_mem_put(mctx, rbtdb->deadnodes,
7810                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7811
7812  cleanup_heaps:
7813         if (rbtdb->heaps != NULL) {
7814                 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7815                         if (rbtdb->heaps[i] != NULL)
7816                                 isc_heap_destroy(&rbtdb->heaps[i]);
7817                 isc_mem_put(hmctx, rbtdb->heaps,
7818                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
7819         }
7820
7821  cleanup_rdatasets:
7822         if (rbtdb->rdatasets != NULL)
7823                 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7824                             sizeof(rdatasetheaderlist_t));
7825  cleanup_rrsetstats:
7826         if (rbtdb->rrsetstats != NULL)
7827                 dns_stats_detach(&rbtdb->rrsetstats);
7828
7829  cleanup_node_locks:
7830         isc_mem_put(mctx, rbtdb->node_locks,
7831                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7832
7833  cleanup_tree_lock:
7834         isc_rwlock_destroy(&rbtdb->tree_lock);
7835
7836  cleanup_lock:
7837         RBTDB_DESTROYLOCK(&rbtdb->lock);
7838
7839  cleanup_rbtdb:
7840         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
7841         return (result);
7842 }
7843
7844
7845 /*
7846  * Slabbed Rdataset Methods
7847  */
7848
7849 static void
7850 rdataset_disassociate(dns_rdataset_t *rdataset) {
7851         dns_db_t *db = rdataset->private1;
7852         dns_dbnode_t *node = rdataset->private2;
7853
7854         detachnode(db, &node);
7855 }
7856
7857 static isc_result_t
7858 rdataset_first(dns_rdataset_t *rdataset) {
7859         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7860         unsigned int count;
7861
7862         count = raw[0] * 256 + raw[1];
7863         if (count == 0) {
7864                 rdataset->private5 = NULL;
7865                 return (ISC_R_NOMORE);
7866         }
7867
7868 #if DNS_RDATASET_FIXED
7869         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7870                 raw += 2 + (4 * count);
7871         else
7872 #endif
7873                 raw += 2;
7874
7875         /*
7876          * The privateuint4 field is the number of rdata beyond the
7877          * cursor position, so we decrement the total count by one
7878          * before storing it.
7879          *
7880          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7881          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7882          * to the first entry in the offset table.
7883          */
7884         count--;
7885         rdataset->privateuint4 = count;
7886         rdataset->private5 = raw;
7887
7888         return (ISC_R_SUCCESS);
7889 }
7890
7891 static isc_result_t
7892 rdataset_next(dns_rdataset_t *rdataset) {
7893         unsigned int count;
7894         unsigned int length;
7895         unsigned char *raw;     /* RDATASLAB */
7896
7897         count = rdataset->privateuint4;
7898         if (count == 0)
7899                 return (ISC_R_NOMORE);
7900         count--;
7901         rdataset->privateuint4 = count;
7902
7903         /*
7904          * Skip forward one record (length + 4) or one offset (4).
7905          */
7906         raw = rdataset->private5;
7907 #if DNS_RDATASET_FIXED
7908         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7909 #endif
7910                 length = raw[0] * 256 + raw[1];
7911                 raw += length;
7912 #if DNS_RDATASET_FIXED
7913         }
7914         rdataset->private5 = raw + 4;           /* length(2) + order(2) */
7915 #else
7916         rdataset->private5 = raw + 2;           /* length(2) */
7917 #endif
7918
7919         return (ISC_R_SUCCESS);
7920 }
7921
7922 static void
7923 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7924         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
7925 #if DNS_RDATASET_FIXED
7926         unsigned int offset;
7927 #endif
7928         unsigned int length;
7929         isc_region_t r;
7930         unsigned int flags = 0;
7931
7932         REQUIRE(raw != NULL);
7933
7934         /*
7935          * Find the start of the record if not already in private5
7936          * then skip the length and order fields.
7937          */
7938 #if DNS_RDATASET_FIXED
7939         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7940                 offset = (raw[0] << 24) + (raw[1] << 16) +
7941                          (raw[2] << 8) + raw[3];
7942                 raw = rdataset->private3;
7943                 raw += offset;
7944         }
7945 #endif
7946         length = raw[0] * 256 + raw[1];
7947 #if DNS_RDATASET_FIXED
7948         raw += 4;
7949 #else
7950         raw += 2;
7951 #endif
7952         if (rdataset->type == dns_rdatatype_rrsig) {
7953                 if (*raw & DNS_RDATASLAB_OFFLINE)
7954                         flags |= DNS_RDATA_OFFLINE;
7955                 length--;
7956                 raw++;
7957         }
7958         r.length = length;
7959         r.base = raw;
7960         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7961         rdata->flags |= flags;
7962 }
7963
7964 static void
7965 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7966         dns_db_t *db = source->private1;
7967         dns_dbnode_t *node = source->private2;
7968         dns_dbnode_t *cloned_node = NULL;
7969
7970         attachnode(db, node, &cloned_node);
7971         *target = *source;
7972
7973         /*
7974          * Reset iterator state.
7975          */
7976         target->privateuint4 = 0;
7977         target->private5 = NULL;
7978 }
7979
7980 static unsigned int
7981 rdataset_count(dns_rdataset_t *rdataset) {
7982         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7983         unsigned int count;
7984
7985         count = raw[0] * 256 + raw[1];
7986
7987         return (count);
7988 }
7989
7990 static isc_result_t
7991 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7992                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7993 {
7994         dns_db_t *db = rdataset->private1;
7995         dns_dbnode_t *node = rdataset->private2;
7996         dns_dbnode_t *cloned_node;
7997         struct noqname *noqname = rdataset->private6;
7998
7999         cloned_node = NULL;
8000         attachnode(db, node, &cloned_node);
8001         nsec->methods = &rdataset_methods;
8002         nsec->rdclass = db->rdclass;
8003         nsec->type = noqname->type;
8004         nsec->covers = 0;
8005         nsec->ttl = rdataset->ttl;
8006         nsec->trust = rdataset->trust;
8007         nsec->private1 = rdataset->private1;
8008         nsec->private2 = rdataset->private2;
8009         nsec->private3 = noqname->neg;
8010         nsec->privateuint4 = 0;
8011         nsec->private5 = NULL;
8012         nsec->private6 = NULL;
8013         nsec->private7 = NULL;
8014
8015         cloned_node = NULL;
8016         attachnode(db, node, &cloned_node);
8017         nsecsig->methods = &rdataset_methods;
8018         nsecsig->rdclass = db->rdclass;
8019         nsecsig->type = dns_rdatatype_rrsig;
8020         nsecsig->covers = noqname->type;
8021         nsecsig->ttl = rdataset->ttl;
8022         nsecsig->trust = rdataset->trust;
8023         nsecsig->private1 = rdataset->private1;
8024         nsecsig->private2 = rdataset->private2;
8025         nsecsig->private3 = noqname->negsig;
8026         nsecsig->privateuint4 = 0;
8027         nsecsig->private5 = NULL;
8028         nsec->private6 = NULL;
8029         nsec->private7 = NULL;
8030
8031         dns_name_clone(&noqname->name, name);
8032
8033         return (ISC_R_SUCCESS);
8034 }
8035
8036 static isc_result_t
8037 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
8038                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
8039 {
8040         dns_db_t *db = rdataset->private1;
8041         dns_dbnode_t *node = rdataset->private2;
8042         dns_dbnode_t *cloned_node;
8043         struct noqname *closest = rdataset->private7;
8044
8045         cloned_node = NULL;
8046         attachnode(db, node, &cloned_node);
8047         nsec->methods = &rdataset_methods;
8048         nsec->rdclass = db->rdclass;
8049         nsec->type = closest->type;
8050         nsec->covers = 0;
8051         nsec->ttl = rdataset->ttl;
8052         nsec->trust = rdataset->trust;
8053         nsec->private1 = rdataset->private1;
8054         nsec->private2 = rdataset->private2;
8055         nsec->private3 = closest->neg;
8056         nsec->privateuint4 = 0;
8057         nsec->private5 = NULL;
8058         nsec->private6 = NULL;
8059         nsec->private7 = NULL;
8060
8061         cloned_node = NULL;
8062         attachnode(db, node, &cloned_node);
8063         nsecsig->methods = &rdataset_methods;
8064         nsecsig->rdclass = db->rdclass;
8065         nsecsig->type = dns_rdatatype_rrsig;
8066         nsecsig->covers = closest->type;
8067         nsecsig->ttl = rdataset->ttl;
8068         nsecsig->trust = rdataset->trust;
8069         nsecsig->private1 = rdataset->private1;
8070         nsecsig->private2 = rdataset->private2;
8071         nsecsig->private3 = closest->negsig;
8072         nsecsig->privateuint4 = 0;
8073         nsecsig->private5 = NULL;
8074         nsec->private6 = NULL;
8075         nsec->private7 = NULL;
8076
8077         dns_name_clone(&closest->name, name);
8078
8079         return (ISC_R_SUCCESS);
8080 }
8081
8082 static void
8083 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
8084         dns_rbtdb_t *rbtdb = rdataset->private1;
8085         dns_rbtnode_t *rbtnode = rdataset->private2;
8086         rdatasetheader_t *header = rdataset->private3;
8087
8088         header--;
8089         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8090                   isc_rwlocktype_write);
8091         header->trust = rdataset->trust = trust;
8092         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8093                   isc_rwlocktype_write);
8094 }
8095
8096 static void
8097 rdataset_expire(dns_rdataset_t *rdataset) {
8098         dns_rbtdb_t *rbtdb = rdataset->private1;
8099         dns_rbtnode_t *rbtnode = rdataset->private2;
8100         rdatasetheader_t *header = rdataset->private3;
8101
8102         header--;
8103         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8104                   isc_rwlocktype_write);
8105         expire_header(rbtdb, header, ISC_FALSE);
8106         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8107                   isc_rwlocktype_write);
8108 }
8109
8110 /*
8111  * Rdataset Iterator Methods
8112  */
8113
8114 static void
8115 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
8116         rbtdb_rdatasetiter_t *rbtiterator;
8117
8118         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
8119
8120         if (rbtiterator->common.version != NULL)
8121                 closeversion(rbtiterator->common.db,
8122                              &rbtiterator->common.version, ISC_FALSE);
8123         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
8124         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
8125                     sizeof(*rbtiterator));
8126
8127         *iteratorp = NULL;
8128 }
8129
8130 static isc_result_t
8131 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
8132         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8133         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8134         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8135         rbtdb_version_t *rbtversion = rbtiterator->common.version;
8136         rdatasetheader_t *header, *top_next;
8137         rbtdb_serial_t serial;
8138         isc_stdtime_t now;
8139
8140         if (IS_CACHE(rbtdb)) {
8141                 serial = 1;
8142                 now = rbtiterator->common.now;
8143         } else {
8144                 serial = rbtversion->serial;
8145                 now = 0;
8146         }
8147
8148         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8149                   isc_rwlocktype_read);
8150
8151         for (header = rbtnode->data; header != NULL; header = top_next) {
8152                 top_next = header->next;
8153                 do {
8154                         if (header->serial <= serial && !IGNORE(header)) {
8155                                 /*
8156                                  * Is this a "this rdataset doesn't exist"
8157                                  * record?  Or is it too old in the cache?
8158                                  *
8159                                  * Note: unlike everywhere else, we
8160                                  * check for now > header->rdh_ttl instead
8161                                  * of now >= header->rdh_ttl.  This allows
8162                                  * ANY and RRSIG queries for 0 TTL
8163                                  * rdatasets to work.
8164                                  */
8165                                 if (NONEXISTENT(header) ||
8166                                     (now != 0 && now > header->rdh_ttl))
8167                                         header = NULL;
8168                                 break;
8169                         } else
8170                                 header = header->down;
8171                 } while (header != NULL);
8172                 if (header != NULL)
8173                         break;
8174         }
8175
8176         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8177                     isc_rwlocktype_read);
8178
8179         rbtiterator->current = header;
8180
8181         if (header == NULL)
8182                 return (ISC_R_NOMORE);
8183
8184         return (ISC_R_SUCCESS);
8185 }
8186
8187 static isc_result_t
8188 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
8189         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8190         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8191         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8192         rbtdb_version_t *rbtversion = rbtiterator->common.version;
8193         rdatasetheader_t *header, *top_next;
8194         rbtdb_serial_t serial;
8195         isc_stdtime_t now;
8196         rbtdb_rdatatype_t type, negtype;
8197         dns_rdatatype_t rdtype, covers;
8198
8199         header = rbtiterator->current;
8200         if (header == NULL)
8201                 return (ISC_R_NOMORE);
8202
8203         if (IS_CACHE(rbtdb)) {
8204                 serial = 1;
8205                 now = rbtiterator->common.now;
8206         } else {
8207                 serial = rbtversion->serial;
8208                 now = 0;
8209         }
8210
8211         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8212                   isc_rwlocktype_read);
8213
8214         type = header->type;
8215         rdtype = RBTDB_RDATATYPE_BASE(header->type);
8216         if (NEGATIVE(header)) {
8217                 covers = RBTDB_RDATATYPE_EXT(header->type);
8218                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
8219         } else
8220                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
8221         for (header = header->next; header != NULL; header = top_next) {
8222                 top_next = header->next;
8223                 /*
8224                  * If not walking back up the down list.
8225                  */
8226                 if (header->type != type && header->type != negtype) {
8227                         do {
8228                                 if (header->serial <= serial &&
8229                                     !IGNORE(header)) {
8230                                         /*
8231                                          * Is this a "this rdataset doesn't
8232                                          * exist" record?
8233                                          *
8234                                          * Note: unlike everywhere else, we
8235                                          * check for now > header->ttl instead
8236                                          * of now >= header->ttl.  This allows
8237                                          * ANY and RRSIG queries for 0 TTL
8238                                          * rdatasets to work.
8239                                          */
8240                                         if ((header->attributes &
8241                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
8242                                             (now != 0 && now > header->rdh_ttl))
8243                                                 header = NULL;
8244                                         break;
8245                                 } else
8246                                         header = header->down;
8247                         } while (header != NULL);
8248                         if (header != NULL)
8249                                 break;
8250                 }
8251         }
8252
8253         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8254                     isc_rwlocktype_read);
8255
8256         rbtiterator->current = header;
8257
8258         if (header == NULL)
8259                 return (ISC_R_NOMORE);
8260
8261         return (ISC_R_SUCCESS);
8262 }
8263
8264 static void
8265 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
8266         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8267         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8268         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8269         rdatasetheader_t *header;
8270
8271         header = rbtiterator->current;
8272         REQUIRE(header != NULL);
8273
8274         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8275                   isc_rwlocktype_read);
8276
8277         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
8278                       rdataset);
8279
8280         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8281                     isc_rwlocktype_read);
8282 }
8283
8284
8285 /*
8286  * Database Iterator Methods
8287  */
8288
8289 static inline void
8290 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
8291         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8292         dns_rbtnode_t *node = rbtdbiter->node;
8293
8294         if (node == NULL)
8295                 return;
8296
8297         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
8298         reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
8299 }
8300
8301 static inline void
8302 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
8303         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8304         dns_rbtnode_t *node = rbtdbiter->node;
8305         nodelock_t *lock;
8306
8307         if (node == NULL)
8308                 return;
8309
8310         lock = &rbtdb->node_locks[node->locknum].lock;
8311         NODE_LOCK(lock, isc_rwlocktype_read);
8312         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
8313                             rbtdbiter->tree_locked, ISC_FALSE);
8314         NODE_UNLOCK(lock, isc_rwlocktype_read);
8315
8316         rbtdbiter->node = NULL;
8317 }
8318
8319 static void
8320 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
8321         dns_rbtnode_t *node;
8322         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8323         isc_boolean_t was_read_locked = ISC_FALSE;
8324         nodelock_t *lock;
8325         int i;
8326
8327         if (rbtdbiter->delete != 0) {
8328                 /*
8329                  * Note that "%d node of %d in tree" can report things like
8330                  * "flush_deletions: 59 nodes of 41 in tree".  This means
8331                  * That some nodes appear on the deletions list more than
8332                  * once.  Only the last occurence will actually be deleted.
8333                  */
8334                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
8335                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
8336                               "flush_deletions: %d nodes of %d in tree",
8337                               rbtdbiter->delete,
8338                               dns_rbt_nodecount(rbtdb->tree));
8339
8340                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
8341                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8342                         was_read_locked = ISC_TRUE;
8343                 }
8344                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8345                 rbtdbiter->tree_locked = isc_rwlocktype_write;
8346
8347                 for (i = 0; i < rbtdbiter->delete; i++) {
8348                         node = rbtdbiter->deletions[i];
8349                         lock = &rbtdb->node_locks[node->locknum].lock;
8350
8351                         NODE_LOCK(lock, isc_rwlocktype_read);
8352                         decrement_reference(rbtdb, node, 0,
8353                                             isc_rwlocktype_read,
8354                                             rbtdbiter->tree_locked, ISC_FALSE);
8355                         NODE_UNLOCK(lock, isc_rwlocktype_read);
8356                 }
8357
8358                 rbtdbiter->delete = 0;
8359
8360                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8361                 if (was_read_locked) {
8362                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8363                         rbtdbiter->tree_locked = isc_rwlocktype_read;
8364
8365                 } else {
8366                         rbtdbiter->tree_locked = isc_rwlocktype_none;
8367                 }
8368         }
8369 }
8370
8371 static inline void
8372 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
8373         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8374
8375         REQUIRE(rbtdbiter->paused);
8376         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
8377
8378         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8379         rbtdbiter->tree_locked = isc_rwlocktype_read;
8380
8381         rbtdbiter->paused = ISC_FALSE;
8382 }
8383
8384 static void
8385 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
8386         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
8387         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8388         dns_db_t *db = NULL;
8389
8390         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
8391                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8392                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8393         } else
8394                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
8395
8396         dereference_iter_node(rbtdbiter);
8397
8398         flush_deletions(rbtdbiter);
8399
8400         dns_db_attach(rbtdbiter->common.db, &db);
8401         dns_db_detach(&rbtdbiter->common.db);
8402
8403         dns_rbtnodechain_reset(&rbtdbiter->chain);
8404         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8405         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
8406         dns_db_detach(&db);
8407
8408         *iteratorp = NULL;
8409 }
8410
8411 static isc_result_t
8412 dbiterator_first(dns_dbiterator_t *iterator) {
8413         isc_result_t result;
8414         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8415         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8416         dns_name_t *name, *origin;
8417
8418         if (rbtdbiter->result != ISC_R_SUCCESS &&
8419             rbtdbiter->result != ISC_R_NOMORE)
8420                 return (rbtdbiter->result);
8421
8422         if (rbtdbiter->paused)
8423                 resume_iteration(rbtdbiter);
8424
8425         dereference_iter_node(rbtdbiter);
8426
8427         name = dns_fixedname_name(&rbtdbiter->name);
8428         origin = dns_fixedname_name(&rbtdbiter->origin);
8429         dns_rbtnodechain_reset(&rbtdbiter->chain);
8430         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8431
8432         if (rbtdbiter->nsec3only) {
8433                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8434                 result = dns_rbtnodechain_first(rbtdbiter->current,
8435                                                 rbtdb->nsec3, name, origin);
8436         } else {
8437                 rbtdbiter->current = &rbtdbiter->chain;
8438                 result = dns_rbtnodechain_first(rbtdbiter->current,
8439                                                 rbtdb->tree, name, origin);
8440                 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
8441                         rbtdbiter->current = &rbtdbiter->nsec3chain;
8442                         result = dns_rbtnodechain_first(rbtdbiter->current,
8443                                                         rbtdb->nsec3, name,
8444                                                         origin);
8445                 }
8446         }
8447         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8448                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8449                                                   NULL, &rbtdbiter->node);
8450                 if (result == ISC_R_SUCCESS) {
8451                         rbtdbiter->new_origin = ISC_TRUE;
8452                         reference_iter_node(rbtdbiter);
8453                 }
8454         } else {
8455                 INSIST(result == ISC_R_NOTFOUND);
8456                 result = ISC_R_NOMORE; /* The tree is empty. */
8457         }
8458
8459         rbtdbiter->result = result;
8460
8461         return (result);
8462 }
8463
8464 static isc_result_t
8465 dbiterator_last(dns_dbiterator_t *iterator) {
8466         isc_result_t result;
8467         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8468         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8469         dns_name_t *name, *origin;
8470
8471         if (rbtdbiter->result != ISC_R_SUCCESS &&
8472             rbtdbiter->result != ISC_R_NOMORE)
8473                 return (rbtdbiter->result);
8474
8475         if (rbtdbiter->paused)
8476                 resume_iteration(rbtdbiter);
8477
8478         dereference_iter_node(rbtdbiter);
8479
8480         name = dns_fixedname_name(&rbtdbiter->name);
8481         origin = dns_fixedname_name(&rbtdbiter->origin);
8482         dns_rbtnodechain_reset(&rbtdbiter->chain);
8483         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8484
8485         result = ISC_R_NOTFOUND;
8486         if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
8487                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8488                 result = dns_rbtnodechain_last(rbtdbiter->current,
8489                                                rbtdb->nsec3, name, origin);
8490         }
8491         if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
8492                 rbtdbiter->current = &rbtdbiter->chain;
8493                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8494                                                name, origin);
8495         }
8496         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8497                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8498                                                   NULL, &rbtdbiter->node);
8499                 if (result == ISC_R_SUCCESS) {
8500                         rbtdbiter->new_origin = ISC_TRUE;
8501                         reference_iter_node(rbtdbiter);
8502                 }
8503         } else {
8504                 INSIST(result == ISC_R_NOTFOUND);
8505                 result = ISC_R_NOMORE; /* The tree is empty. */
8506         }
8507
8508         rbtdbiter->result = result;
8509
8510         return (result);
8511 }
8512
8513 static isc_result_t
8514 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
8515         isc_result_t result, tresult;
8516         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8517         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8518         dns_name_t *iname, *origin;
8519
8520         if (rbtdbiter->result != ISC_R_SUCCESS &&
8521             rbtdbiter->result != ISC_R_NOTFOUND &&
8522             rbtdbiter->result != ISC_R_NOMORE)
8523                 return (rbtdbiter->result);
8524
8525         if (rbtdbiter->paused)
8526                 resume_iteration(rbtdbiter);
8527
8528         dereference_iter_node(rbtdbiter);
8529
8530         iname = dns_fixedname_name(&rbtdbiter->name);
8531         origin = dns_fixedname_name(&rbtdbiter->origin);
8532         dns_rbtnodechain_reset(&rbtdbiter->chain);
8533         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8534
8535         if (rbtdbiter->nsec3only) {
8536                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8537                 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8538                                           &rbtdbiter->node,
8539                                           rbtdbiter->current,
8540                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8541         } else if (rbtdbiter->nonsec3) {
8542                 rbtdbiter->current = &rbtdbiter->chain;
8543                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8544                                           &rbtdbiter->node,
8545                                           rbtdbiter->current,
8546                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8547         } else {
8548                 /*
8549                  * Stay on main chain if not found on either chain.
8550                  */
8551                 rbtdbiter->current = &rbtdbiter->chain;
8552                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8553                                           &rbtdbiter->node,
8554                                           rbtdbiter->current,
8555                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8556                 if (result == DNS_R_PARTIALMATCH) {
8557                         dns_rbtnode_t *node = NULL;
8558                         tresult = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8559                                                   &node, &rbtdbiter->nsec3chain,
8560                                                   DNS_RBTFIND_EMPTYDATA,
8561                                                   NULL, NULL);
8562                         if (tresult == ISC_R_SUCCESS) {
8563                                 rbtdbiter->node = node;
8564                                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8565                                 result = tresult;
8566                         }
8567                 }
8568         }
8569
8570 #if 1
8571         if (result == ISC_R_SUCCESS) {
8572                 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
8573                                                   origin, NULL);
8574                 if (result == ISC_R_SUCCESS) {
8575                         rbtdbiter->new_origin = ISC_TRUE;
8576                         reference_iter_node(rbtdbiter);
8577                 }
8578         } else if (result == DNS_R_PARTIALMATCH) {
8579                 result = ISC_R_NOTFOUND;
8580                 rbtdbiter->node = NULL;
8581         }
8582
8583         rbtdbiter->result = result;
8584 #else
8585         if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8586                 isc_result_t tresult;
8587                 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
8588                                                    origin, NULL);
8589                 if (tresult == ISC_R_SUCCESS) {
8590                         rbtdbiter->new_origin = ISC_TRUE;
8591                         reference_iter_node(rbtdbiter);
8592                 } else {
8593                         result = tresult;
8594                         rbtdbiter->node = NULL;
8595                 }
8596         } else
8597                 rbtdbiter->node = NULL;
8598
8599         rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
8600                             ISC_R_SUCCESS : result;
8601 #endif
8602
8603         return (result);
8604 }
8605
8606 static isc_result_t
8607 dbiterator_prev(dns_dbiterator_t *iterator) {
8608         isc_result_t result;
8609         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8610         dns_name_t *name, *origin;
8611         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8612
8613         REQUIRE(rbtdbiter->node != NULL);
8614
8615         if (rbtdbiter->result != ISC_R_SUCCESS)
8616                 return (rbtdbiter->result);
8617
8618         if (rbtdbiter->paused)
8619                 resume_iteration(rbtdbiter);
8620
8621         name = dns_fixedname_name(&rbtdbiter->name);
8622         origin = dns_fixedname_name(&rbtdbiter->origin);
8623         result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
8624         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8625             !rbtdbiter->nonsec3 &&
8626             &rbtdbiter->nsec3chain == rbtdbiter->current) {
8627                 rbtdbiter->current = &rbtdbiter->chain;
8628                 dns_rbtnodechain_reset(rbtdbiter->current);
8629                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8630                                                name, origin);
8631                 if (result == ISC_R_NOTFOUND)
8632                         result = ISC_R_NOMORE;
8633         }
8634
8635         dereference_iter_node(rbtdbiter);
8636
8637         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8638                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8639                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8640                                                   NULL, &rbtdbiter->node);
8641         }
8642
8643         if (result == ISC_R_SUCCESS)
8644                 reference_iter_node(rbtdbiter);
8645
8646         rbtdbiter->result = result;
8647
8648         return (result);
8649 }
8650
8651 static isc_result_t
8652 dbiterator_next(dns_dbiterator_t *iterator) {
8653         isc_result_t result;
8654         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8655         dns_name_t *name, *origin;
8656         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8657
8658         REQUIRE(rbtdbiter->node != NULL);
8659
8660         if (rbtdbiter->result != ISC_R_SUCCESS)
8661                 return (rbtdbiter->result);
8662
8663         if (rbtdbiter->paused)
8664                 resume_iteration(rbtdbiter);
8665
8666         name = dns_fixedname_name(&rbtdbiter->name);
8667         origin = dns_fixedname_name(&rbtdbiter->origin);
8668         result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8669         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8670             !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8671                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8672                 dns_rbtnodechain_reset(rbtdbiter->current);
8673                 result = dns_rbtnodechain_first(rbtdbiter->current,
8674                                                 rbtdb->nsec3, name, origin);
8675                 if (result == ISC_R_NOTFOUND)
8676                         result = ISC_R_NOMORE;
8677         }
8678
8679         dereference_iter_node(rbtdbiter);
8680
8681         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8682                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8683                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8684                                                   NULL, &rbtdbiter->node);
8685         }
8686         if (result == ISC_R_SUCCESS)
8687                 reference_iter_node(rbtdbiter);
8688
8689         rbtdbiter->result = result;
8690
8691         return (result);
8692 }
8693
8694 static isc_result_t
8695 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8696                    dns_name_t *name)
8697 {
8698         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8699         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8700         dns_rbtnode_t *node = rbtdbiter->node;
8701         isc_result_t result;
8702         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8703         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8704
8705         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8706         REQUIRE(rbtdbiter->node != NULL);
8707
8708         if (rbtdbiter->paused)
8709                 resume_iteration(rbtdbiter);
8710
8711         if (name != NULL) {
8712                 if (rbtdbiter->common.relative_names)
8713                         origin = NULL;
8714                 result = dns_name_concatenate(nodename, origin, name, NULL);
8715                 if (result != ISC_R_SUCCESS)
8716                         return (result);
8717                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8718                         result = DNS_R_NEWORIGIN;
8719         } else
8720                 result = ISC_R_SUCCESS;
8721
8722         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8723         new_reference(rbtdb, node);
8724         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8725
8726         *nodep = rbtdbiter->node;
8727
8728         if (iterator->cleaning && result == ISC_R_SUCCESS) {
8729                 isc_result_t expire_result;
8730
8731                 /*
8732                  * If the deletion array is full, flush it before trying
8733                  * to expire the current node.  The current node can't
8734                  * fully deleted while the iteration cursor is still on it.
8735                  */
8736                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8737                         flush_deletions(rbtdbiter);
8738
8739                 expire_result = expirenode(iterator->db, *nodep, 0);
8740
8741                 /*
8742                  * expirenode() currently always returns success.
8743                  */
8744                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8745                         unsigned int refs;
8746
8747                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
8748                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8749                         dns_rbtnode_refincrement(node, &refs);
8750                         INSIST(refs != 0);
8751                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8752                 }
8753         }
8754
8755         return (result);
8756 }
8757
8758 static isc_result_t
8759 dbiterator_pause(dns_dbiterator_t *iterator) {
8760         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8761         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8762
8763         if (rbtdbiter->result != ISC_R_SUCCESS &&
8764             rbtdbiter->result != ISC_R_NOMORE)
8765                 return (rbtdbiter->result);
8766
8767         if (rbtdbiter->paused)
8768                 return (ISC_R_SUCCESS);
8769
8770         rbtdbiter->paused = ISC_TRUE;
8771
8772         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8773                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8774                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8775                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8776         }
8777
8778         flush_deletions(rbtdbiter);
8779
8780         return (ISC_R_SUCCESS);
8781 }
8782
8783 static isc_result_t
8784 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8785         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8786         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8787
8788         if (rbtdbiter->result != ISC_R_SUCCESS)
8789                 return (rbtdbiter->result);
8790
8791         return (dns_name_copy(origin, name, NULL));
8792 }
8793
8794 /*%
8795  * Additional cache routines.
8796  */
8797 static isc_result_t
8798 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8799                        dns_rdatatype_t qtype, dns_acache_t *acache,
8800                        dns_zone_t **zonep, dns_db_t **dbp,
8801                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8802                        dns_name_t *fname, dns_message_t *msg,
8803                        isc_stdtime_t now)
8804 {
8805 #ifndef BIND9
8806         UNUSED(rdataset);
8807         UNUSED(type);
8808         UNUSED(qtype);
8809         UNUSED(acache);
8810         UNUSED(zonep);
8811         UNUSED(dbp);
8812         UNUSED(versionp);
8813         UNUSED(nodep);
8814         UNUSED(fname);
8815         UNUSED(msg);
8816         UNUSED(now);
8817
8818         return (ISC_R_NOTIMPLEMENTED);
8819 #else
8820         dns_rbtdb_t *rbtdb = rdataset->private1;
8821         dns_rbtnode_t *rbtnode = rdataset->private2;
8822         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8823         unsigned int current_count = rdataset->privateuint4;
8824         unsigned int count;
8825         rdatasetheader_t *header;
8826         nodelock_t *nodelock;
8827         unsigned int total_count;
8828         acachectl_t *acarray;
8829         dns_acacheentry_t *entry;
8830         isc_result_t result;
8831
8832         UNUSED(qtype); /* we do not use this value at least for now */
8833         UNUSED(acache);
8834
8835         header = (struct rdatasetheader *)(raw - sizeof(*header));
8836
8837         total_count = raw[0] * 256 + raw[1];
8838         INSIST(total_count > current_count);
8839         count = total_count - current_count - 1;
8840
8841         acarray = NULL;
8842
8843         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8844         NODE_LOCK(nodelock, isc_rwlocktype_read);
8845
8846         switch (type) {
8847         case dns_rdatasetadditional_fromauth:
8848                 acarray = header->additional_auth;
8849                 break;
8850         case dns_rdatasetadditional_fromcache:
8851                 acarray = NULL;
8852                 break;
8853         case dns_rdatasetadditional_fromglue:
8854                 acarray = header->additional_glue;
8855                 break;
8856         default:
8857                 INSIST(0);
8858         }
8859
8860         if (acarray == NULL) {
8861                 if (type != dns_rdatasetadditional_fromcache)
8862                         dns_acache_countquerymiss(acache);
8863                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8864                 return (ISC_R_NOTFOUND);
8865         }
8866
8867         if (acarray[count].entry == NULL) {
8868                 dns_acache_countquerymiss(acache);
8869                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8870                 return (ISC_R_NOTFOUND);
8871         }
8872
8873         entry = NULL;
8874         dns_acache_attachentry(acarray[count].entry, &entry);
8875
8876         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8877
8878         result = dns_acache_getentry(entry, zonep, dbp, versionp,
8879                                      nodep, fname, msg, now);
8880
8881         dns_acache_detachentry(&entry);
8882
8883         return (result);
8884 }
8885
8886 static void
8887 acache_callback(dns_acacheentry_t *entry, void **arg) {
8888         dns_rbtdb_t *rbtdb;
8889         dns_rbtnode_t *rbtnode;
8890         nodelock_t *nodelock;
8891         acachectl_t *acarray = NULL;
8892         acache_cbarg_t *cbarg;
8893         unsigned int count;
8894
8895         REQUIRE(arg != NULL);
8896         cbarg = *arg;
8897
8898         /*
8899          * The caller must hold the entry lock.
8900          */
8901
8902         rbtdb = (dns_rbtdb_t *)cbarg->db;
8903         rbtnode = (dns_rbtnode_t *)cbarg->node;
8904
8905         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8906         NODE_LOCK(nodelock, isc_rwlocktype_write);
8907
8908         switch (cbarg->type) {
8909         case dns_rdatasetadditional_fromauth:
8910                 acarray = cbarg->header->additional_auth;
8911                 break;
8912         case dns_rdatasetadditional_fromglue:
8913                 acarray = cbarg->header->additional_glue;
8914                 break;
8915         default:
8916                 INSIST(0);
8917         }
8918
8919         count = cbarg->count;
8920         if (acarray != NULL && acarray[count].entry == entry) {
8921                 acarray[count].entry = NULL;
8922                 INSIST(acarray[count].cbarg == cbarg);
8923                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8924                 acarray[count].cbarg = NULL;
8925         } else
8926                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8927
8928         dns_acache_detachentry(&entry);
8929
8930         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8931
8932         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8933         dns_db_detach((dns_db_t **)(void*)&rbtdb);
8934
8935         *arg = NULL;
8936 #endif /* BIND9 */
8937 }
8938
8939 #ifdef BIND9
8940 static void
8941 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8942                       acache_cbarg_t **cbargp)
8943 {
8944         acache_cbarg_t *cbarg;
8945
8946         REQUIRE(mctx != NULL);
8947         REQUIRE(entry != NULL);
8948         REQUIRE(cbargp != NULL && *cbargp != NULL);
8949
8950         cbarg = *cbargp;
8951
8952         dns_acache_cancelentry(entry);
8953         dns_db_detachnode(cbarg->db, &cbarg->node);
8954         dns_db_detach(&cbarg->db);
8955
8956         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8957
8958         *cbargp = NULL;
8959 }
8960 #endif /* BIND9 */
8961
8962 static isc_result_t
8963 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8964                        dns_rdatatype_t qtype, dns_acache_t *acache,
8965                        dns_zone_t *zone, dns_db_t *db,
8966                        dns_dbversion_t *version, dns_dbnode_t *node,
8967                        dns_name_t *fname)
8968 {
8969 #ifndef BIND9
8970         UNUSED(rdataset);
8971         UNUSED(type);
8972         UNUSED(qtype);
8973         UNUSED(acache);
8974         UNUSED(zone);
8975         UNUSED(db);
8976         UNUSED(version);
8977         UNUSED(node);
8978         UNUSED(fname);
8979
8980         return (ISC_R_NOTIMPLEMENTED);
8981 #else
8982         dns_rbtdb_t *rbtdb = rdataset->private1;
8983         dns_rbtnode_t *rbtnode = rdataset->private2;
8984         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8985         unsigned int current_count = rdataset->privateuint4;
8986         rdatasetheader_t *header;
8987         unsigned int total_count, count;
8988         nodelock_t *nodelock;
8989         isc_result_t result;
8990         acachectl_t *acarray;
8991         dns_acacheentry_t *newentry, *oldentry = NULL;
8992         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8993
8994         UNUSED(qtype);
8995
8996         if (type == dns_rdatasetadditional_fromcache)
8997                 return (ISC_R_SUCCESS);
8998
8999         header = (struct rdatasetheader *)(raw - sizeof(*header));
9000
9001         total_count = raw[0] * 256 + raw[1];
9002         INSIST(total_count > current_count);
9003         count = total_count - current_count - 1; /* should be private data */
9004
9005         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
9006         if (newcbarg == NULL)
9007                 return (ISC_R_NOMEMORY);
9008         newcbarg->type = type;
9009         newcbarg->count = count;
9010         newcbarg->header = header;
9011         newcbarg->db = NULL;
9012         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
9013         newcbarg->node = NULL;
9014         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
9015                           &newcbarg->node);
9016         newentry = NULL;
9017         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
9018                                         acache_callback, newcbarg, &newentry);
9019         if (result != ISC_R_SUCCESS)
9020                 goto fail;
9021         /* Set cache data in the new entry. */
9022         result = dns_acache_setentry(acache, newentry, zone, db,
9023                                      version, node, fname);
9024         if (result != ISC_R_SUCCESS)
9025                 goto fail;
9026
9027         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
9028         NODE_LOCK(nodelock, isc_rwlocktype_write);
9029
9030         acarray = NULL;
9031         switch (type) {
9032         case dns_rdatasetadditional_fromauth:
9033                 acarray = header->additional_auth;
9034                 break;
9035         case dns_rdatasetadditional_fromglue:
9036                 acarray = header->additional_glue;
9037                 break;
9038         default:
9039                 INSIST(0);
9040         }
9041
9042         if (acarray == NULL) {
9043                 unsigned int i;
9044
9045                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
9046                                       sizeof(acachectl_t));
9047
9048                 if (acarray == NULL) {
9049                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9050                         goto fail;
9051                 }
9052
9053                 for (i = 0; i < total_count; i++) {
9054                         acarray[i].entry = NULL;
9055                         acarray[i].cbarg = NULL;
9056                 }
9057         }
9058         switch (type) {
9059         case dns_rdatasetadditional_fromauth:
9060                 header->additional_auth = acarray;
9061                 break;
9062         case dns_rdatasetadditional_fromglue:
9063                 header->additional_glue = acarray;
9064                 break;
9065         default:
9066                 INSIST(0);
9067         }
9068
9069         if (acarray[count].entry != NULL) {
9070                 /*
9071                  * Swap the entry.  Delay cleaning-up the old entry since
9072                  * it would require a node lock.
9073                  */
9074                 oldentry = acarray[count].entry;
9075                 INSIST(acarray[count].cbarg != NULL);
9076                 oldcbarg = acarray[count].cbarg;
9077         }
9078         acarray[count].entry = newentry;
9079         acarray[count].cbarg = newcbarg;
9080
9081         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9082
9083         if (oldentry != NULL) {
9084                 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
9085                 dns_acache_detachentry(&oldentry);
9086         }
9087
9088         return (ISC_R_SUCCESS);
9089
9090  fail:
9091         if (newcbarg != NULL) {
9092                 if (newentry != NULL) {
9093                         acache_cancelentry(rbtdb->common.mctx, newentry,
9094                                            &newcbarg);
9095                         dns_acache_detachentry(&newentry);
9096                 } else {
9097                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
9098                         dns_db_detach(&newcbarg->db);
9099                         isc_mem_put(rbtdb->common.mctx, newcbarg,
9100                             sizeof(*newcbarg));
9101                 }
9102         }
9103
9104         return (result);
9105 #endif
9106 }
9107
9108 static isc_result_t
9109 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
9110                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
9111 {
9112 #ifndef BIND9
9113         UNUSED(acache);
9114         UNUSED(rdataset);
9115         UNUSED(type);
9116         UNUSED(qtype);
9117
9118         return (ISC_R_NOTIMPLEMENTED);
9119 #else
9120         dns_rbtdb_t *rbtdb = rdataset->private1;
9121         dns_rbtnode_t *rbtnode = rdataset->private2;
9122         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
9123         unsigned int current_count = rdataset->privateuint4;
9124         rdatasetheader_t *header;
9125         nodelock_t *nodelock;
9126         unsigned int total_count, count;
9127         acachectl_t *acarray;
9128         dns_acacheentry_t *entry;
9129         acache_cbarg_t *cbarg;
9130
9131         UNUSED(qtype);          /* we do not use this value at least for now */
9132         UNUSED(acache);
9133
9134         if (type == dns_rdatasetadditional_fromcache)
9135                 return (ISC_R_SUCCESS);
9136
9137         header = (struct rdatasetheader *)(raw - sizeof(*header));
9138
9139         total_count = raw[0] * 256 + raw[1];
9140         INSIST(total_count > current_count);
9141         count = total_count - current_count - 1;
9142
9143         acarray = NULL;
9144         entry = NULL;
9145
9146         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
9147         NODE_LOCK(nodelock, isc_rwlocktype_write);
9148
9149         switch (type) {
9150         case dns_rdatasetadditional_fromauth:
9151                 acarray = header->additional_auth;
9152                 break;
9153         case dns_rdatasetadditional_fromglue:
9154                 acarray = header->additional_glue;
9155                 break;
9156         default:
9157                 INSIST(0);
9158         }
9159
9160         if (acarray == NULL) {
9161                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9162                 return (ISC_R_NOTFOUND);
9163         }
9164
9165         entry = acarray[count].entry;
9166         if (entry == NULL) {
9167                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9168                 return (ISC_R_NOTFOUND);
9169         }
9170
9171         acarray[count].entry = NULL;
9172         cbarg = acarray[count].cbarg;
9173         acarray[count].cbarg = NULL;
9174
9175         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9176
9177         if (entry != NULL) {
9178                 if (cbarg != NULL)
9179                         acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
9180                 dns_acache_detachentry(&entry);
9181         }
9182
9183         return (ISC_R_SUCCESS);
9184 #endif
9185 }
9186
9187 /*%
9188  * Routines for LRU-based cache management.
9189  */
9190
9191 /*%
9192  * See if a given cache entry that is being reused needs to be updated
9193  * in the LRU-list.  From the LRU management point of view, this function is
9194  * expected to return true for almost all cases.  When used with threads,
9195  * however, this may cause a non-negligible performance penalty because a
9196  * writer lock will have to be acquired before updating the list.
9197  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
9198  * function returns true if the entry has not been updated for some period of
9199  * time.  We differentiate the NS or glue address case and the others since
9200  * experiments have shown that the former tends to be accessed relatively
9201  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
9202  * may cause external queries at a higher level zone, involving more
9203  * transactions).
9204  *
9205  * Caller must hold the node (read or write) lock.
9206  */
9207 static inline isc_boolean_t
9208 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
9209         if ((header->attributes &
9210              (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
9211                 return (ISC_FALSE);
9212
9213 #if DNS_RBTDB_LIMITLRUUPDATE
9214         if (header->type == dns_rdatatype_ns ||
9215             (header->trust == dns_trust_glue &&
9216              (header->type == dns_rdatatype_a ||
9217               header->type == dns_rdatatype_aaaa))) {
9218                 /*
9219                  * Glue records are updated if at least 60 seconds have passed
9220                  * since the previous update time.
9221                  */
9222                 return (header->last_used + 60 <= now);
9223         }
9224
9225         /* Other records are updated if 5 minutes have passed. */
9226         return (header->last_used + 300 <= now);
9227 #else
9228         UNUSED(now);
9229
9230         return (ISC_TRUE);
9231 #endif
9232 }
9233
9234 /*%
9235  * Update the timestamp of a given cache entry and move it to the head
9236  * of the corresponding LRU list.
9237  *
9238  * Caller must hold the node (write) lock.
9239  *
9240  * Note that the we do NOT touch the heap here, as the TTL has not changed.
9241  */
9242 static void
9243 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
9244               isc_stdtime_t now)
9245 {
9246         INSIST(IS_CACHE(rbtdb));
9247
9248         /* To be checked: can we really assume this? XXXMLG */
9249         INSIST(ISC_LINK_LINKED(header, link));
9250
9251         ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
9252         header->last_used = now;
9253         ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
9254 }
9255
9256 /*%
9257  * Purge some expired and/or stale (i.e. unused for some period) cache entries
9258  * under an overmem condition.  To recover from this condition quickly, up to
9259  * 2 entries will be purged.  This process is triggered while adding a new
9260  * entry, and we specifically avoid purging entries in the same LRU bucket as
9261  * the one to which the new entry will belong.  Otherwise, we might purge
9262  * entries of the same name of different RR types while adding RRsets from a
9263  * single response (consider the case where we're adding A and AAAA glue records
9264  * of the same NS name).
9265  */
9266 static void
9267 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
9268               isc_stdtime_t now, isc_boolean_t tree_locked)
9269 {
9270         rdatasetheader_t *header, *header_prev;
9271         unsigned int locknum;
9272         int purgecount = 2;
9273
9274         for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
9275              locknum != locknum_start && purgecount > 0;
9276              locknum = (locknum + 1) % rbtdb->node_lock_count) {
9277                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
9278                           isc_rwlocktype_write);
9279
9280                 header = isc_heap_element(rbtdb->heaps[locknum], 1);
9281                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
9282                         expire_header(rbtdb, header, tree_locked);
9283                         purgecount--;
9284                 }
9285
9286                 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
9287                      header != NULL && purgecount > 0;
9288                      header = header_prev) {
9289                         header_prev = ISC_LIST_PREV(header, link);
9290                         /*
9291                          * Unlink the entry at this point to avoid checking it
9292                          * again even if it's currently used someone else and
9293                          * cannot be purged at this moment.  This entry won't be
9294                          * referenced any more (so unlinking is safe) since the
9295                          * TTL was reset to 0.
9296                          */
9297                         ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
9298                                         link);
9299                         expire_header(rbtdb, header, tree_locked);
9300                         purgecount--;
9301                 }
9302
9303                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
9304                                     isc_rwlocktype_write);
9305         }
9306 }
9307
9308 static void
9309 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
9310               isc_boolean_t tree_locked)
9311 {
9312         set_ttl(rbtdb, header, 0);
9313         header->attributes |= RDATASET_ATTR_STALE;
9314         header->node->dirty = 1;
9315
9316         /*
9317          * Caller must hold the node (write) lock.
9318          */
9319
9320         if (dns_rbtnode_refcurrent(header->node) == 0) {
9321                 /*
9322                  * If no one else is using the node, we can clean it up now.
9323                  * We first need to gain a new reference to the node to meet a
9324                  * requirement of decrement_reference().
9325                  */
9326                 new_reference(rbtdb, header->node);
9327                 decrement_reference(rbtdb, header->node, 0,
9328                                     isc_rwlocktype_write,
9329                                     tree_locked ? isc_rwlocktype_write :
9330                                     isc_rwlocktype_none, ISC_FALSE);
9331         }
9332 }