]> CyberLeo.Net >> Repos - FreeBSD/releng/9.3.git/blob - contrib/bind9/lib/dns/rbtdb.c
Copy stable/9 to releng/9.3 as part of the 9.3-RELEASE cycle.
[FreeBSD/releng/9.3.git] / contrib / bind9 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004-2014  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id$ */
19
20 /*! \file */
21
22 /*
23  * Principal Author: Bob Halley
24  */
25
26 #include <config.h>
27
28 /* #define inline */
29
30 #include <isc/event.h>
31 #include <isc/heap.h>
32 #include <isc/mem.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
41 #include <isc/task.h>
42 #include <isc/time.h>
43 #include <isc/util.h>
44
45 #include <dns/acache.h>
46 #include <dns/db.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
50 #include <dns/lib.h>
51 #include <dns/log.h>
52 #include <dns/masterdump.h>
53 #include <dns/nsec.h>
54 #include <dns/nsec3.h>
55 #include <dns/rbt.h>
56 #include <dns/rpz.h>
57 #include <dns/rdata.h>
58 #include <dns/rdataset.h>
59 #include <dns/rdatasetiter.h>
60 #include <dns/rdataslab.h>
61 #include <dns/rdatastruct.h>
62 #include <dns/result.h>
63 #include <dns/stats.h>
64 #include <dns/view.h>
65 #include <dns/zone.h>
66 #include <dns/zonekey.h>
67
68 #ifdef DNS_RBTDB_VERSION64
69 #include "rbtdb64.h"
70 #else
71 #include "rbtdb.h"
72 #endif
73
74 #ifdef DNS_RBTDB_VERSION64
75 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
76 #else
77 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
78 #endif
79
80 /*%
81  * Note that "impmagic" is not the first four bytes of the struct, so
82  * ISC_MAGIC_VALID cannot be used.
83  */
84 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
85                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
86
87 #ifdef DNS_RBTDB_VERSION64
88 typedef isc_uint64_t                    rbtdb_serial_t;
89 /*%
90  * Make casting easier in symbolic debuggers by using different names
91  * for the 64 bit version.
92  */
93 #define dns_rbtdb_t dns_rbtdb64_t
94 #define rdatasetheader_t rdatasetheader64_t
95 #define rbtdb_version_t rbtdb_version64_t
96 #else
97 typedef isc_uint32_t                    rbtdb_serial_t;
98 #endif
99
100 typedef isc_uint32_t                    rbtdb_rdatatype_t;
101
102 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
103 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
104 #define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
105
106 #define RBTDB_RDATATYPE_SIGNSEC \
107                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
108 #define RBTDB_RDATATYPE_SIGNSEC3 \
109                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
110 #define RBTDB_RDATATYPE_SIGNS \
111                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
112 #define RBTDB_RDATATYPE_SIGCNAME \
113                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
114 #define RBTDB_RDATATYPE_SIGDNAME \
115                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
116 #define RBTDB_RDATATYPE_SIGDDS \
117                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ds)
118 #define RBTDB_RDATATYPE_NCACHEANY \
119                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
120
121 /*
122  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
123  * Using rwlock is effective with regard to lookup performance only when
124  * it is implemented in an efficient way.
125  * Otherwise, it is generally wise to stick to the simple locking since rwlock
126  * would require more memory or can even make lookups slower due to its own
127  * overhead (when it internally calls mutex locks).
128  */
129 #ifdef ISC_RWLOCK_USEATOMIC
130 #define DNS_RBTDB_USERWLOCK 1
131 #else
132 #define DNS_RBTDB_USERWLOCK 0
133 #endif
134
135 #if DNS_RBTDB_USERWLOCK
136 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
137 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
138 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
139 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
140 #else
141 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
142 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
143 #define RBTDB_LOCK(l, t)        LOCK(l)
144 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
145 #endif
146
147 /*
148  * Since node locking is sensitive to both performance and memory footprint,
149  * we need some trick here.  If we have both high-performance rwlock and
150  * high performance and small-memory reference counters, we use rwlock for
151  * node lock and isc_refcount for node references.  In this case, we don't have
152  * to protect the access to the counters by locks.
153  * Otherwise, we simply use ordinary mutex lock for node locking, and use
154  * simple integers as reference counters which is protected by the lock.
155  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
156  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
157  * counters first and then protect other parts of a node as read-only data.
158  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
159  * provided for these special cases.  When we can use the efficient backend
160  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
161  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
162  * section including the access to the reference counter.
163  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
164  * section is also protected by NODE_STRONGLOCK().
165  */
166 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
167 typedef isc_rwlock_t nodelock_t;
168
169 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
170 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
171 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
172 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
173 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
174
175 #define NODE_STRONGLOCK(l)      ((void)0)
176 #define NODE_STRONGUNLOCK(l)    ((void)0)
177 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
178 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
179 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
180 #else
181 typedef isc_mutex_t nodelock_t;
182
183 #define NODE_INITLOCK(l)        isc_mutex_init(l)
184 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
185 #define NODE_LOCK(l, t)         LOCK(l)
186 #define NODE_UNLOCK(l, t)       UNLOCK(l)
187 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
188
189 #define NODE_STRONGLOCK(l)      LOCK(l)
190 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
191 #define NODE_WEAKLOCK(l, t)     ((void)0)
192 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
193 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
194 #endif
195
196 /*%
197  * Whether to rate-limit updating the LRU to avoid possible thread contention.
198  * Our performance measurement has shown the cost is marginal, so it's defined
199  * to be 0 by default either with or without threads.
200  */
201 #ifndef DNS_RBTDB_LIMITLRUUPDATE
202 #define DNS_RBTDB_LIMITLRUUPDATE 0
203 #endif
204
205 /*
206  * Allow clients with a virtual time of up to 5 minutes in the past to see
207  * records that would have otherwise have expired.
208  */
209 #define RBTDB_VIRTUAL 300
210
211 struct noqname {
212         dns_name_t      name;
213         void *          neg;
214         void *          negsig;
215         dns_rdatatype_t type;
216 };
217
218 typedef struct acachectl acachectl_t;
219
220 typedef struct rdatasetheader {
221         /*%
222          * Locked by the owning node's lock.
223          */
224         rbtdb_serial_t                  serial;
225         dns_ttl_t                       rdh_ttl;
226         rbtdb_rdatatype_t               type;
227         isc_uint16_t                    attributes;
228         dns_trust_t                     trust;
229         struct noqname                  *noqname;
230         struct noqname                  *closest;
231         /*%<
232          * We don't use the LIST macros, because the LIST structure has
233          * both head and tail pointers, and is doubly linked.
234          */
235
236         struct rdatasetheader           *next;
237         /*%<
238          * If this is the top header for an rdataset, 'next' points
239          * to the top header for the next rdataset (i.e., the next type).
240          * Otherwise, it points up to the header whose down pointer points
241          * at this header.
242          */
243
244         struct rdatasetheader           *down;
245         /*%<
246          * Points to the header for the next older version of
247          * this rdataset.
248          */
249
250         isc_uint32_t                    count;
251         /*%<
252          * Monotonously increased every time this rdataset is bound so that
253          * it is used as the base of the starting point in DNS responses
254          * when the "cyclic" rrset-order is required.  Since the ordering
255          * should not be so crucial, no lock is set for the counter for
256          * performance reasons.
257          */
258
259         acachectl_t                     *additional_auth;
260         acachectl_t                     *additional_glue;
261
262         dns_rbtnode_t                   *node;
263         isc_stdtime_t                   last_used;
264         ISC_LINK(struct rdatasetheader) link;
265
266         unsigned int                    heap_index;
267         /*%<
268          * Used for TTL-based cache cleaning.
269          */
270         isc_stdtime_t                   resign;
271 } rdatasetheader_t;
272
273 typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
274 typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;
275
276 #define RDATASET_ATTR_NONEXISTENT       0x0001
277 #define RDATASET_ATTR_STALE             0x0002
278 #define RDATASET_ATTR_IGNORE            0x0004
279 #define RDATASET_ATTR_RETAIN            0x0008
280 #define RDATASET_ATTR_NXDOMAIN          0x0010
281 #define RDATASET_ATTR_RESIGN            0x0020
282 #define RDATASET_ATTR_STATCOUNT         0x0040
283 #define RDATASET_ATTR_OPTOUT            0x0080
284 #define RDATASET_ATTR_NEGATIVE          0x0100
285
286 typedef struct acache_cbarg {
287         dns_rdatasetadditional_t        type;
288         unsigned int                    count;
289         dns_db_t                        *db;
290         dns_dbnode_t                    *node;
291         rdatasetheader_t                *header;
292 } acache_cbarg_t;
293
294 struct acachectl {
295         dns_acacheentry_t               *entry;
296         acache_cbarg_t                  *cbarg;
297 };
298
299 /*
300  * XXX
301  * When the cache will pre-expire data (due to memory low or other
302  * situations) before the rdataset's TTL has expired, it MUST
303  * respect the RETAIN bit and not expire the data until its TTL is
304  * expired.
305  */
306
307 #undef IGNORE                   /* WIN32 winbase.h defines this. */
308
309 #define EXISTS(header) \
310         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
311 #define NONEXISTENT(header) \
312         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
313 #define IGNORE(header) \
314         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
315 #define RETAIN(header) \
316         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
317 #define NXDOMAIN(header) \
318         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
319 #define RESIGN(header) \
320         (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
321 #define OPTOUT(header) \
322         (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
323 #define NEGATIVE(header) \
324         (((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
325
326 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
327
328 /*%
329  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
330  * There is a tradeoff issue about configuring this value: if this is too
331  * small, it may cause heavier contention between threads; if this is too large,
332  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
333  * The default value should work well for most environments, but this can
334  * also be configurable at compilation time via the
335  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
336  * 1 due to the assumption of overmem_purge().
337  */
338 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
339 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
340 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
341 #else
342 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
343 #endif
344 #else
345 #define DEFAULT_CACHE_NODE_LOCK_COUNT   16
346 #endif  /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
347
348 typedef struct {
349         nodelock_t                      lock;
350         /* Protected in the refcount routines. */
351         isc_refcount_t                  references;
352         /* Locked by lock. */
353         isc_boolean_t                   exiting;
354 } rbtdb_nodelock_t;
355
356 typedef struct rbtdb_changed {
357         dns_rbtnode_t *                 node;
358         isc_boolean_t                   dirty;
359         ISC_LINK(struct rbtdb_changed)  link;
360 } rbtdb_changed_t;
361
362 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
363
364 typedef enum {
365         dns_db_insecure,
366         dns_db_partial,
367         dns_db_secure
368 } dns_db_secure_t;
369
370 typedef struct dns_rbtdb dns_rbtdb_t;
371
372 typedef struct rbtdb_version {
373         /* Not locked */
374         rbtdb_serial_t                  serial;
375         dns_rbtdb_t *                   rbtdb;
376         /*
377          * Protected in the refcount routines.
378          * XXXJT: should we change the lock policy based on the refcount
379          * performance?
380          */
381         isc_refcount_t                  references;
382         /* Locked by database lock. */
383         isc_boolean_t                   writer;
384         isc_boolean_t                   commit_ok;
385         rbtdb_changedlist_t             changed_list;
386         rdatasetheaderlist_t            resigned_list;
387         ISC_LINK(struct rbtdb_version)  link;
388         dns_db_secure_t                 secure;
389         isc_boolean_t                   havensec3;
390         /* NSEC3 parameters */
391         dns_hash_t                      hash;
392         isc_uint8_t                     flags;
393         isc_uint16_t                    iterations;
394         isc_uint8_t                     salt_length;
395         unsigned char                   salt[DNS_NSEC3_SALTSIZE];
396 } rbtdb_version_t;
397
398 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
399
400 struct dns_rbtdb {
401         /* Unlocked. */
402         dns_db_t                        common;
403         /* Locks the data in this struct */
404 #if DNS_RBTDB_USERWLOCK
405         isc_rwlock_t                    lock;
406 #else
407         isc_mutex_t                     lock;
408 #endif
409         /* Locks the tree structure (prevents nodes appearing/disappearing) */
410         isc_rwlock_t                    tree_lock;
411         /* Locks for individual tree nodes */
412         unsigned int                    node_lock_count;
413         rbtdb_nodelock_t *              node_locks;
414         dns_rbtnode_t *                 origin_node;
415         dns_stats_t *                   rrsetstats; /* cache DB only */
416         /* Locked by lock. */
417         unsigned int                    active;
418         isc_refcount_t                  references;
419         unsigned int                    attributes;
420         rbtdb_serial_t                  current_serial;
421         rbtdb_serial_t                  least_serial;
422         rbtdb_serial_t                  next_serial;
423         rbtdb_version_t *               current_version;
424         rbtdb_version_t *               future_version;
425         rbtdb_versionlist_t             open_versions;
426         isc_task_t *                    task;
427         dns_dbnode_t                    *soanode;
428         dns_dbnode_t                    *nsnode;
429
430         /*
431          * This is a linked list used to implement the LRU cache.  There will
432          * be node_lock_count linked lists here.  Nodes in bucket 1 will be
433          * placed on the linked list rdatasets[1].
434          */
435         rdatasetheaderlist_t            *rdatasets;
436
437         /*%
438          * Temporary storage for stale cache nodes and dynamically deleted
439          * nodes that await being cleaned up.
440          */
441         rbtnodelist_t                   *deadnodes;
442
443         /*
444          * Heaps.  These are used for TTL based expiry in a cache,
445          * or for zone resigning in a zone DB.  hmctx is the memory
446          * context to use for the heap (which differs from the main
447          * database memory context in the case of a cache).
448          */
449         isc_mem_t *                     hmctx;
450         isc_heap_t                      **heaps;
451
452         /* Locked by tree_lock. */
453         dns_rbt_t *                     tree;
454         dns_rbt_t *                     nsec;
455         dns_rbt_t *                     nsec3;
456         dns_rpz_cidr_t *                rpz_cidr;
457
458         /* Unlocked */
459         unsigned int                    quantum;
460 };
461
462 #define RBTDB_ATTR_LOADED               0x01
463 #define RBTDB_ATTR_LOADING              0x02
464
465 /*%
466  * Search Context
467  */
468 typedef struct {
469         dns_rbtdb_t *           rbtdb;
470         rbtdb_version_t *       rbtversion;
471         rbtdb_serial_t          serial;
472         unsigned int            options;
473         dns_rbtnodechain_t      chain;
474         isc_boolean_t           copy_name;
475         isc_boolean_t           need_cleanup;
476         isc_boolean_t           wild;
477         dns_rbtnode_t *         zonecut;
478         rdatasetheader_t *      zonecut_rdataset;
479         rdatasetheader_t *      zonecut_sigrdataset;
480         dns_fixedname_t         zonecut_name;
481         isc_stdtime_t           now;
482 } rbtdb_search_t;
483
484 /*%
485  * Load Context
486  */
487 typedef struct {
488         dns_rbtdb_t *           rbtdb;
489         isc_stdtime_t           now;
490 } rbtdb_load_t;
491
492 static void rdataset_disassociate(dns_rdataset_t *rdataset);
493 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
494 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
495 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
496 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
497 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
498 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
499                                         dns_name_t *name,
500                                         dns_rdataset_t *neg,
501                                         dns_rdataset_t *negsig);
502 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
503                                         dns_name_t *name,
504                                         dns_rdataset_t *neg,
505                                         dns_rdataset_t *negsig);
506 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
507                                            dns_rdatasetadditional_t type,
508                                            dns_rdatatype_t qtype,
509                                            dns_acache_t *acache,
510                                            dns_zone_t **zonep,
511                                            dns_db_t **dbp,
512                                            dns_dbversion_t **versionp,
513                                            dns_dbnode_t **nodep,
514                                            dns_name_t *fname,
515                                            dns_message_t *msg,
516                                            isc_stdtime_t now);
517 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
518                                            dns_rdatasetadditional_t type,
519                                            dns_rdatatype_t qtype,
520                                            dns_acache_t *acache,
521                                            dns_zone_t *zone,
522                                            dns_db_t *db,
523                                            dns_dbversion_t *version,
524                                            dns_dbnode_t *node,
525                                            dns_name_t *fname);
526 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
527                                            dns_rdataset_t *rdataset,
528                                            dns_rdatasetadditional_t type,
529                                            dns_rdatatype_t qtype);
530 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
531                                               isc_stdtime_t now);
532 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
533                           isc_stdtime_t now);
534 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
535                           isc_boolean_t tree_locked);
536 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
537                           isc_stdtime_t now, isc_boolean_t tree_locked);
538 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
539                                   rdatasetheader_t *newheader);
540 static void prune_tree(isc_task_t *task, isc_event_t *event);
541 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
542 static void rdataset_expire(dns_rdataset_t *rdataset);
543
544 static dns_rdatasetmethods_t rdataset_methods = {
545         rdataset_disassociate,
546         rdataset_first,
547         rdataset_next,
548         rdataset_current,
549         rdataset_clone,
550         rdataset_count,
551         NULL,
552         rdataset_getnoqname,
553         NULL,
554         rdataset_getclosest,
555         rdataset_getadditional,
556         rdataset_setadditional,
557         rdataset_putadditional,
558         rdataset_settrust,
559         rdataset_expire
560 };
561
562 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
563 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
564 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
565 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
566                                  dns_rdataset_t *rdataset);
567
568 static dns_rdatasetitermethods_t rdatasetiter_methods = {
569         rdatasetiter_destroy,
570         rdatasetiter_first,
571         rdatasetiter_next,
572         rdatasetiter_current
573 };
574
575 typedef struct rbtdb_rdatasetiter {
576         dns_rdatasetiter_t              common;
577         rdatasetheader_t *              current;
578 } rbtdb_rdatasetiter_t;
579
580 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
581 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
582 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
583 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
584                                         dns_name_t *name);
585 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
586 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
587 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
588                                            dns_dbnode_t **nodep,
589                                            dns_name_t *name);
590 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
591 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
592                                           dns_name_t *name);
593
594 static dns_dbiteratormethods_t dbiterator_methods = {
595         dbiterator_destroy,
596         dbiterator_first,
597         dbiterator_last,
598         dbiterator_seek,
599         dbiterator_prev,
600         dbiterator_next,
601         dbiterator_current,
602         dbiterator_pause,
603         dbiterator_origin
604 };
605
606 #define DELETION_BATCH_MAX 64
607
608 /*
609  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
610  */
611 typedef struct rbtdb_dbiterator {
612         dns_dbiterator_t                common;
613         isc_boolean_t                   paused;
614         isc_boolean_t                   new_origin;
615         isc_rwlocktype_t                tree_locked;
616         isc_result_t                    result;
617         dns_fixedname_t                 name;
618         dns_fixedname_t                 origin;
619         dns_rbtnodechain_t              chain;
620         dns_rbtnodechain_t              nsec3chain;
621         dns_rbtnodechain_t              *current;
622         dns_rbtnode_t                   *node;
623         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
624         int                             delete;
625         isc_boolean_t                   nsec3only;
626         isc_boolean_t                   nonsec3;
627 } rbtdb_dbiterator_t;
628
629
630 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
631 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
632
633 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
634                        isc_event_t *event);
635 static void overmem(dns_db_t *db, isc_boolean_t overmem);
636 #ifdef BIND9
637 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version);
638 #endif
639
640 /*%
641  * 'init_count' is used to initialize 'newheader->count' which inturn
642  * is used to determine where in the cycle rrset-order cyclic starts.
643  * We don't lock this as we don't care about simultaneous updates.
644  *
645  * Note:
646  *      Both init_count and header->count can be ISC_UINT32_MAX.
647  *      The count on the returned rdataset however can't be as
648  *      that indicates that the database does not implement cyclic
649  *      processing.
650  */
651 static unsigned int init_count;
652
653 /*
654  * Locking
655  *
656  * If a routine is going to lock more than one lock in this module, then
657  * the locking must be done in the following order:
658  *
659  *      Tree Lock
660  *
661  *      Node Lock       (Only one from the set may be locked at one time by
662  *                       any caller)
663  *
664  *      Database Lock
665  *
666  * Failure to follow this hierarchy can result in deadlock.
667  */
668
669 /*
670  * Deleting Nodes
671  *
672  * For zone databases the node for the origin of the zone MUST NOT be deleted.
673  */
674
675
676 /*
677  * DB Routines
678  */
679
680 static void
681 attach(dns_db_t *source, dns_db_t **targetp) {
682         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
683
684         REQUIRE(VALID_RBTDB(rbtdb));
685
686         isc_refcount_increment(&rbtdb->references, NULL);
687
688         *targetp = source;
689 }
690
691 static void
692 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
693         dns_rbtdb_t *rbtdb = event->ev_arg;
694
695         UNUSED(task);
696
697         free_rbtdb(rbtdb, ISC_TRUE, event);
698 }
699
700 static void
701 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
702                   isc_boolean_t increment)
703 {
704         dns_rdatastatstype_t statattributes = 0;
705         dns_rdatastatstype_t base = 0;
706         dns_rdatastatstype_t type;
707
708         /* At the moment we count statistics only for cache DB */
709         INSIST(IS_CACHE(rbtdb));
710
711         if (NEGATIVE(header)) {
712                 if (NXDOMAIN(header))
713                         statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
714                 else {
715                         statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
716                         base = RBTDB_RDATATYPE_EXT(header->type);
717                 }
718         } else
719                 base = RBTDB_RDATATYPE_BASE(header->type);
720
721         type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
722         if (increment)
723                 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
724         else
725                 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
726 }
727
728 static void
729 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
730         int idx;
731         isc_heap_t *heap;
732         dns_ttl_t oldttl;
733
734         oldttl = header->rdh_ttl;
735         header->rdh_ttl = newttl;
736
737         if (!IS_CACHE(rbtdb))
738                 return;
739
740         /*
741          * It's possible the rbtdb is not a cache.  If this is the case,
742          * we will not have a heap, and we move on.  If we do, though,
743          * we might need to adjust things.
744          */
745         if (header->heap_index == 0 || newttl == oldttl)
746                 return;
747         idx = header->node->locknum;
748         if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
749             return;
750         heap = rbtdb->heaps[idx];
751
752         if (newttl < oldttl)
753                 isc_heap_increased(heap, header->heap_index);
754         else
755                 isc_heap_decreased(heap, header->heap_index);
756 }
757
758 /*%
759  * These functions allow the heap code to rank the priority of each
760  * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
761  */
762 static isc_boolean_t
763 ttl_sooner(void *v1, void *v2) {
764         rdatasetheader_t *h1 = v1;
765         rdatasetheader_t *h2 = v2;
766
767         if (h1->rdh_ttl < h2->rdh_ttl)
768                 return (ISC_TRUE);
769         return (ISC_FALSE);
770 }
771
772 static isc_boolean_t
773 resign_sooner(void *v1, void *v2) {
774         rdatasetheader_t *h1 = v1;
775         rdatasetheader_t *h2 = v2;
776
777         if (h1->resign < h2->resign)
778                 return (ISC_TRUE);
779         return (ISC_FALSE);
780 }
781
782 /*%
783  * This function sets the heap index into the header.
784  */
785 static void
786 set_index(void *what, unsigned int index) {
787         rdatasetheader_t *h = what;
788
789         h->heap_index = index;
790 }
791
792 /*%
793  * Work out how many nodes can be deleted in the time between two
794  * requests to the nameserver.  Smooth the resulting number and use it
795  * as a estimate for the number of nodes to be deleted in the next
796  * iteration.
797  */
798 static unsigned int
799 adjust_quantum(unsigned int old, isc_time_t *start) {
800         unsigned int pps = dns_pps;     /* packets per second */
801         unsigned int interval;
802         isc_uint64_t usecs;
803         isc_time_t end;
804         unsigned int new;
805
806         if (pps < 100)
807                 pps = 100;
808         isc_time_now(&end);
809
810         interval = 1000000 / pps;       /* interval in usec */
811         if (interval == 0)
812                 interval = 1;
813         usecs = isc_time_microdiff(&end, start);
814         if (usecs == 0) {
815                 /*
816                  * We were unable to measure the amount of time taken.
817                  * Double the nodes deleted next time.
818                  */
819                 old *= 2;
820                 if (old > 1000)
821                         old = 1000;
822                 return (old);
823         }
824         new = old * interval;
825         new /= (unsigned int)usecs;
826         if (new == 0)
827                 new = 1;
828         else if (new > 1000)
829                 new = 1000;
830
831         /* Smooth */
832         new = (new + old * 3) / 4;
833
834         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
835                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
836
837         return (new);
838 }
839
840 static void
841 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
842         unsigned int i;
843         isc_ondestroy_t ondest;
844         isc_result_t result;
845         char buf[DNS_NAME_FORMATSIZE];
846         dns_rbt_t **treep;
847         isc_time_t start;
848
849         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
850                 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
851
852         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
853         REQUIRE(rbtdb->future_version == NULL);
854
855         if (rbtdb->current_version != NULL) {
856                 unsigned int refs;
857
858                 isc_refcount_decrement(&rbtdb->current_version->references,
859                                        &refs);
860                 INSIST(refs == 0);
861                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
862                 isc_refcount_destroy(&rbtdb->current_version->references);
863                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
864                             sizeof(rbtdb_version_t));
865         }
866
867         /*
868          * We assume the number of remaining dead nodes is reasonably small;
869          * the overhead of unlinking all nodes here should be negligible.
870          */
871         for (i = 0; i < rbtdb->node_lock_count; i++) {
872                 dns_rbtnode_t *node;
873
874                 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
875                 while (node != NULL) {
876                         ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
877                         node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
878                 }
879         }
880
881         if (event == NULL)
882                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
883
884         for (;;) {
885                 /*
886                  * pick the next tree to (start to) destroy
887                  */
888                 treep = &rbtdb->tree;
889                 if (*treep == NULL) {
890                         treep = &rbtdb->nsec;
891                         if (*treep == NULL) {
892                                 treep = &rbtdb->nsec3;
893                                 /*
894                                  * we're finished after clear cutting
895                                  */
896                                 if (*treep == NULL)
897                                         break;
898                         }
899                 }
900
901                 isc_time_now(&start);
902                 result = dns_rbt_destroy2(treep, rbtdb->quantum);
903                 if (result == ISC_R_QUOTA) {
904                         INSIST(rbtdb->task != NULL);
905                         if (rbtdb->quantum != 0)
906                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
907                                                                 &start);
908                         if (event == NULL)
909                                 event = isc_event_allocate(rbtdb->common.mctx,
910                                                            NULL,
911                                                          DNS_EVENT_FREESTORAGE,
912                                                            free_rbtdb_callback,
913                                                            rbtdb,
914                                                            sizeof(isc_event_t));
915                         if (event == NULL)
916                                 continue;
917                         isc_task_send(rbtdb->task, &event);
918                         return;
919                 }
920                 INSIST(result == ISC_R_SUCCESS && *treep == NULL);
921         }
922
923         if (event != NULL)
924                 isc_event_free(&event);
925         if (log) {
926                 if (dns_name_dynamic(&rbtdb->common.origin))
927                         dns_name_format(&rbtdb->common.origin, buf,
928                                         sizeof(buf));
929                 else
930                         strcpy(buf, "<UNKNOWN>");
931                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
932                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
933                               "done free_rbtdb(%s)", buf);
934         }
935         if (dns_name_dynamic(&rbtdb->common.origin))
936                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
937         for (i = 0; i < rbtdb->node_lock_count; i++) {
938                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
939                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
940         }
941
942         /*
943          * Clean up LRU / re-signing order lists.
944          */
945         if (rbtdb->rdatasets != NULL) {
946                 for (i = 0; i < rbtdb->node_lock_count; i++)
947                         INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
948                 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
949                             rbtdb->node_lock_count *
950                             sizeof(rdatasetheaderlist_t));
951         }
952         /*
953          * Clean up dead node buckets.
954          */
955         if (rbtdb->deadnodes != NULL) {
956                 for (i = 0; i < rbtdb->node_lock_count; i++)
957                         INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
958                 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
959                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
960         }
961         /*
962          * Clean up heap objects.
963          */
964         if (rbtdb->heaps != NULL) {
965                 for (i = 0; i < rbtdb->node_lock_count; i++)
966                         isc_heap_destroy(&rbtdb->heaps[i]);
967                 isc_mem_put(rbtdb->hmctx, rbtdb->heaps,
968                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
969         }
970
971         if (rbtdb->rrsetstats != NULL)
972                 dns_stats_detach(&rbtdb->rrsetstats);
973
974 #ifdef BIND9
975         if (rbtdb->rpz_cidr != NULL)
976                 dns_rpz_cidr_free(&rbtdb->rpz_cidr);
977 #endif
978
979         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
980                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
981         isc_rwlock_destroy(&rbtdb->tree_lock);
982         isc_refcount_destroy(&rbtdb->references);
983         if (rbtdb->task != NULL)
984                 isc_task_detach(&rbtdb->task);
985
986         RBTDB_DESTROYLOCK(&rbtdb->lock);
987         rbtdb->common.magic = 0;
988         rbtdb->common.impmagic = 0;
989         ondest = rbtdb->common.ondest;
990         isc_mem_detach(&rbtdb->hmctx);
991         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
992         isc_ondestroy_notify(&ondest, rbtdb);
993 }
994
995 static inline void
996 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
997         isc_boolean_t want_free = ISC_FALSE;
998         unsigned int i;
999         unsigned int inactive = 0;
1000
1001         /* XXX check for open versions here */
1002
1003         if (rbtdb->soanode != NULL)
1004                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
1005         if (rbtdb->nsnode != NULL)
1006                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
1007
1008         /*
1009          * Even though there are no external direct references, there still
1010          * may be nodes in use.
1011          */
1012         for (i = 0; i < rbtdb->node_lock_count; i++) {
1013                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1014                 rbtdb->node_locks[i].exiting = ISC_TRUE;
1015                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1016                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1017                     == 0) {
1018                         inactive++;
1019                 }
1020         }
1021
1022         if (inactive != 0) {
1023                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1024                 rbtdb->active -= inactive;
1025                 if (rbtdb->active == 0)
1026                         want_free = ISC_TRUE;
1027                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1028                 if (want_free) {
1029                         char buf[DNS_NAME_FORMATSIZE];
1030                         if (dns_name_dynamic(&rbtdb->common.origin))
1031                                 dns_name_format(&rbtdb->common.origin, buf,
1032                                                 sizeof(buf));
1033                         else
1034                                 strcpy(buf, "<UNKNOWN>");
1035                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1036                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1037                                       "calling free_rbtdb(%s)", buf);
1038                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
1039                 }
1040         }
1041 }
1042
1043 static void
1044 detach(dns_db_t **dbp) {
1045         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1046         unsigned int refs;
1047
1048         REQUIRE(VALID_RBTDB(rbtdb));
1049
1050         isc_refcount_decrement(&rbtdb->references, &refs);
1051
1052         if (refs == 0)
1053                 maybe_free_rbtdb(rbtdb);
1054
1055         *dbp = NULL;
1056 }
1057
1058 static void
1059 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1060         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1061         rbtdb_version_t *version;
1062         unsigned int refs;
1063
1064         REQUIRE(VALID_RBTDB(rbtdb));
1065
1066         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1067         version = rbtdb->current_version;
1068         isc_refcount_increment(&version->references, &refs);
1069         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1070
1071         *versionp = (dns_dbversion_t *)version;
1072 }
1073
1074 static inline rbtdb_version_t *
1075 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1076                  unsigned int references, isc_boolean_t writer)
1077 {
1078         isc_result_t result;
1079         rbtdb_version_t *version;
1080
1081         version = isc_mem_get(mctx, sizeof(*version));
1082         if (version == NULL)
1083                 return (NULL);
1084         version->serial = serial;
1085         result = isc_refcount_init(&version->references, references);
1086         if (result != ISC_R_SUCCESS) {
1087                 isc_mem_put(mctx, version, sizeof(*version));
1088                 return (NULL);
1089         }
1090         version->writer = writer;
1091         version->commit_ok = ISC_FALSE;
1092         ISC_LIST_INIT(version->changed_list);
1093         ISC_LIST_INIT(version->resigned_list);
1094         ISC_LINK_INIT(version, link);
1095
1096         return (version);
1097 }
1098
1099 static isc_result_t
1100 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1101         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1102         rbtdb_version_t *version;
1103
1104         REQUIRE(VALID_RBTDB(rbtdb));
1105         REQUIRE(versionp != NULL && *versionp == NULL);
1106         REQUIRE(rbtdb->future_version == NULL);
1107
1108         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1109         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
1110         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1111                                    ISC_TRUE);
1112         if (version != NULL) {
1113                 version->rbtdb = rbtdb;
1114                 version->commit_ok = ISC_TRUE;
1115                 version->secure = rbtdb->current_version->secure;
1116                 version->havensec3 = rbtdb->current_version->havensec3;
1117                 if (version->havensec3) {
1118                         version->flags = rbtdb->current_version->flags;
1119                         version->iterations =
1120                                 rbtdb->current_version->iterations;
1121                         version->hash = rbtdb->current_version->hash;
1122                         version->salt_length =
1123                                 rbtdb->current_version->salt_length;
1124                         memmove(version->salt, rbtdb->current_version->salt,
1125                                 version->salt_length);
1126                 } else {
1127                         version->flags = 0;
1128                         version->iterations = 0;
1129                         version->hash = 0;
1130                         version->salt_length = 0;
1131                         memset(version->salt, 0, sizeof(version->salt));
1132                 }
1133                 rbtdb->next_serial++;
1134                 rbtdb->future_version = version;
1135         }
1136         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1137
1138         if (version == NULL)
1139                 return (ISC_R_NOMEMORY);
1140
1141         *versionp = version;
1142
1143         return (ISC_R_SUCCESS);
1144 }
1145
1146 static void
1147 attachversion(dns_db_t *db, dns_dbversion_t *source,
1148               dns_dbversion_t **targetp)
1149 {
1150         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1151         rbtdb_version_t *rbtversion = source;
1152         unsigned int refs;
1153
1154         REQUIRE(VALID_RBTDB(rbtdb));
1155         INSIST(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
1156
1157         isc_refcount_increment(&rbtversion->references, &refs);
1158         INSIST(refs > 1);
1159
1160         *targetp = rbtversion;
1161 }
1162
1163 static rbtdb_changed_t *
1164 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1165             dns_rbtnode_t *node)
1166 {
1167         rbtdb_changed_t *changed;
1168         unsigned int refs;
1169
1170         /*
1171          * Caller must be holding the node lock if its reference must be
1172          * protected by the lock.
1173          */
1174
1175         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1176
1177         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1178
1179         REQUIRE(version->writer);
1180
1181         if (changed != NULL) {
1182                 dns_rbtnode_refincrement(node, &refs);
1183                 INSIST(refs != 0);
1184                 changed->node = node;
1185                 changed->dirty = ISC_FALSE;
1186                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1187         } else
1188                 version->commit_ok = ISC_FALSE;
1189
1190         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1191
1192         return (changed);
1193 }
1194
1195 static void
1196 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1197                  acachectl_t *array)
1198 {
1199         unsigned int count;
1200         unsigned int i;
1201         unsigned char *raw;     /* RDATASLAB */
1202
1203         /*
1204          * The caller must be holding the corresponding node lock.
1205          */
1206
1207         if (array == NULL)
1208                 return;
1209
1210         raw = (unsigned char *)header + sizeof(*header);
1211         count = raw[0] * 256 + raw[1];
1212
1213         /*
1214          * Sanity check: since an additional cache entry has a reference to
1215          * the original DB node (in the callback arg), there should be no
1216          * acache entries when the node can be freed.
1217          */
1218         for (i = 0; i < count; i++)
1219                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1220
1221         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1222 }
1223
1224 static inline void
1225 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1226
1227         if (dns_name_dynamic(&(*noqname)->name))
1228                 dns_name_free(&(*noqname)->name, mctx);
1229         if ((*noqname)->neg != NULL)
1230                 isc_mem_put(mctx, (*noqname)->neg,
1231                             dns_rdataslab_size((*noqname)->neg, 0));
1232         if ((*noqname)->negsig != NULL)
1233                 isc_mem_put(mctx, (*noqname)->negsig,
1234                             dns_rdataslab_size((*noqname)->negsig, 0));
1235         isc_mem_put(mctx, *noqname, sizeof(**noqname));
1236         *noqname = NULL;
1237 }
1238
1239 static inline void
1240 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1241 {
1242         ISC_LINK_INIT(h, link);
1243         h->heap_index = 0;
1244
1245 #if TRACE_HEADER
1246         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1247                 fprintf(stderr, "initialized header: %p\n", h);
1248 #else
1249         UNUSED(rbtdb);
1250 #endif
1251 }
1252
1253 static inline rdatasetheader_t *
1254 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1255 {
1256         rdatasetheader_t *h;
1257
1258         h = isc_mem_get(mctx, sizeof(*h));
1259         if (h == NULL)
1260                 return (NULL);
1261
1262 #if TRACE_HEADER
1263         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1264                 fprintf(stderr, "allocated header: %p\n", h);
1265 #endif
1266         init_rdataset(rbtdb, h);
1267         return (h);
1268 }
1269
1270 static inline void
1271 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1272 {
1273         unsigned int size;
1274         int idx;
1275
1276         if (EXISTS(rdataset) &&
1277             (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1278                 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1279         }
1280
1281         idx = rdataset->node->locknum;
1282         if (ISC_LINK_LINKED(rdataset, link)) {
1283                 INSIST(IS_CACHE(rbtdb));
1284                 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1285         }
1286         if (rdataset->heap_index != 0)
1287                 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1288         rdataset->heap_index = 0;
1289
1290         if (rdataset->noqname != NULL)
1291                 free_noqname(mctx, &rdataset->noqname);
1292         if (rdataset->closest != NULL)
1293                 free_noqname(mctx, &rdataset->closest);
1294
1295         free_acachearray(mctx, rdataset, rdataset->additional_auth);
1296         free_acachearray(mctx, rdataset, rdataset->additional_glue);
1297
1298         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1299                 size = sizeof(*rdataset);
1300         else
1301                 size = dns_rdataslab_size((unsigned char *)rdataset,
1302                                           sizeof(*rdataset));
1303         isc_mem_put(mctx, rdataset, size);
1304 }
1305
1306 static inline void
1307 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1308         rdatasetheader_t *header, *dcurrent;
1309         isc_boolean_t make_dirty = ISC_FALSE;
1310
1311         /*
1312          * Caller must hold the node lock.
1313          */
1314
1315         /*
1316          * We set the IGNORE attribute on rdatasets with serial number
1317          * 'serial'.  When the reference count goes to zero, these rdatasets
1318          * will be cleaned up; until that time, they will be ignored.
1319          */
1320         for (header = node->data; header != NULL; header = header->next) {
1321                 if (header->serial == serial) {
1322                         header->attributes |= RDATASET_ATTR_IGNORE;
1323                         make_dirty = ISC_TRUE;
1324                 }
1325                 for (dcurrent = header->down;
1326                      dcurrent != NULL;
1327                      dcurrent = dcurrent->down) {
1328                         if (dcurrent->serial == serial) {
1329                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1330                                 make_dirty = ISC_TRUE;
1331                         }
1332                 }
1333         }
1334         if (make_dirty)
1335                 node->dirty = 1;
1336 }
1337
1338 static inline void
1339 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1340 {
1341         rdatasetheader_t *d, *down_next;
1342
1343         for (d = top->down; d != NULL; d = down_next) {
1344                 down_next = d->down;
1345                 free_rdataset(rbtdb, mctx, d);
1346         }
1347         top->down = NULL;
1348 }
1349
1350 static inline void
1351 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1352         rdatasetheader_t *current, *top_prev, *top_next;
1353         isc_mem_t *mctx = rbtdb->common.mctx;
1354
1355         /*
1356          * Caller must be holding the node lock.
1357          */
1358
1359         top_prev = NULL;
1360         for (current = node->data; current != NULL; current = top_next) {
1361                 top_next = current->next;
1362                 clean_stale_headers(rbtdb, mctx, current);
1363                 /*
1364                  * If current is nonexistent or stale, we can clean it up.
1365                  */
1366                 if ((current->attributes &
1367                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1368                         if (top_prev != NULL)
1369                                 top_prev->next = current->next;
1370                         else
1371                                 node->data = current->next;
1372                         free_rdataset(rbtdb, mctx, current);
1373                 } else
1374                         top_prev = current;
1375         }
1376         node->dirty = 0;
1377 }
1378
1379 static inline void
1380 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1381                 rbtdb_serial_t least_serial)
1382 {
1383         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1384         rdatasetheader_t *top_prev, *top_next;
1385         isc_mem_t *mctx = rbtdb->common.mctx;
1386         isc_boolean_t still_dirty = ISC_FALSE;
1387
1388         /*
1389          * Caller must be holding the node lock.
1390          */
1391         REQUIRE(least_serial != 0);
1392
1393         top_prev = NULL;
1394         for (current = node->data; current != NULL; current = top_next) {
1395                 top_next = current->next;
1396
1397                 /*
1398                  * First, we clean up any instances of multiple rdatasets
1399                  * with the same serial number, or that have the IGNORE
1400                  * attribute.
1401                  */
1402                 dparent = current;
1403                 for (dcurrent = current->down;
1404                      dcurrent != NULL;
1405                      dcurrent = down_next) {
1406                         down_next = dcurrent->down;
1407                         INSIST(dcurrent->serial <= dparent->serial);
1408                         if (dcurrent->serial == dparent->serial ||
1409                             IGNORE(dcurrent)) {
1410                                 if (down_next != NULL)
1411                                         down_next->next = dparent;
1412                                 dparent->down = down_next;
1413                                 free_rdataset(rbtdb, mctx, dcurrent);
1414                         } else
1415                                 dparent = dcurrent;
1416                 }
1417
1418                 /*
1419                  * We've now eliminated all IGNORE datasets with the possible
1420                  * exception of current, which we now check.
1421                  */
1422                 if (IGNORE(current)) {
1423                         down_next = current->down;
1424                         if (down_next == NULL) {
1425                                 if (top_prev != NULL)
1426                                         top_prev->next = current->next;
1427                                 else
1428                                         node->data = current->next;
1429                                 free_rdataset(rbtdb, mctx, current);
1430                                 /*
1431                                  * current no longer exists, so we can
1432                                  * just continue with the loop.
1433                                  */
1434                                 continue;
1435                         } else {
1436                                 /*
1437                                  * Pull up current->down, making it the new
1438                                  * current.
1439                                  */
1440                                 if (top_prev != NULL)
1441                                         top_prev->next = down_next;
1442                                 else
1443                                         node->data = down_next;
1444                                 down_next->next = top_next;
1445                                 free_rdataset(rbtdb, mctx, current);
1446                                 current = down_next;
1447                         }
1448                 }
1449
1450                 /*
1451                  * We now try to find the first down node less than the
1452                  * least serial.
1453                  */
1454                 dparent = current;
1455                 for (dcurrent = current->down;
1456                      dcurrent != NULL;
1457                      dcurrent = down_next) {
1458                         down_next = dcurrent->down;
1459                         if (dcurrent->serial < least_serial)
1460                                 break;
1461                         dparent = dcurrent;
1462                 }
1463
1464                 /*
1465                  * If there is a such an rdataset, delete it and any older
1466                  * versions.
1467                  */
1468                 if (dcurrent != NULL) {
1469                         do {
1470                                 down_next = dcurrent->down;
1471                                 INSIST(dcurrent->serial <= least_serial);
1472                                 free_rdataset(rbtdb, mctx, dcurrent);
1473                                 dcurrent = down_next;
1474                         } while (dcurrent != NULL);
1475                         dparent->down = NULL;
1476                 }
1477
1478                 /*
1479                  * Note.  The serial number of 'current' might be less than
1480                  * least_serial too, but we cannot delete it because it is
1481                  * the most recent version, unless it is a NONEXISTENT
1482                  * rdataset.
1483                  */
1484                 if (current->down != NULL) {
1485                         still_dirty = ISC_TRUE;
1486                         top_prev = current;
1487                 } else {
1488                         /*
1489                          * If this is a NONEXISTENT rdataset, we can delete it.
1490                          */
1491                         if (NONEXISTENT(current)) {
1492                                 if (top_prev != NULL)
1493                                         top_prev->next = current->next;
1494                                 else
1495                                         node->data = current->next;
1496                                 free_rdataset(rbtdb, mctx, current);
1497                         } else
1498                                 top_prev = current;
1499                 }
1500         }
1501         if (!still_dirty)
1502                 node->dirty = 0;
1503 }
1504
1505 static void
1506 delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node)
1507 {
1508         dns_rbtnode_t *nsecnode;
1509         dns_fixedname_t fname;
1510         dns_name_t *name;
1511         isc_result_t result = ISC_R_UNEXPECTED;
1512
1513         INSIST(!ISC_LINK_LINKED(node, deadlink));
1514
1515         switch (node->nsec) {
1516         case DNS_RBT_NSEC_NORMAL:
1517 #ifdef BIND9
1518                 if (rbtdb->rpz_cidr != NULL) {
1519                         dns_fixedname_init(&fname);
1520                         name = dns_fixedname_name(&fname);
1521                         dns_rbt_fullnamefromnode(node, name);
1522                         dns_rpz_cidr_deleteip(rbtdb->rpz_cidr, name);
1523                 }
1524 #endif
1525                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1526                 break;
1527         case DNS_RBT_NSEC_HAS_NSEC:
1528                 dns_fixedname_init(&fname);
1529                 name = dns_fixedname_name(&fname);
1530                 dns_rbt_fullnamefromnode(node, name);
1531                 /*
1532                  * Delete the corresponding node from the auxiliary NSEC
1533                  * tree before deleting from the main tree.
1534                  */
1535                 nsecnode = NULL;
1536                 result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode,
1537                                           NULL, DNS_RBTFIND_EMPTYDATA,
1538                                           NULL, NULL);
1539                 if (result != ISC_R_SUCCESS) {
1540                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1541                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1542                                       "delete_node: "
1543                                       "dns_rbt_findnode(nsec): %s",
1544                                       isc_result_totext(result));
1545                 } else {
1546                         result = dns_rbt_deletenode(rbtdb->nsec, nsecnode,
1547                                                     ISC_FALSE);
1548                         if (result != ISC_R_SUCCESS) {
1549                                 isc_log_write(dns_lctx,
1550                                               DNS_LOGCATEGORY_DATABASE,
1551                                               DNS_LOGMODULE_CACHE,
1552                                               ISC_LOG_WARNING,
1553                                               "delete_node(): "
1554                                               "dns_rbt_deletenode(nsecnode): %s",
1555                                               isc_result_totext(result));
1556                         }
1557                 }
1558 #ifdef BIND9
1559                 if (rbtdb->rpz_cidr != NULL)
1560                         dns_rpz_cidr_deleteip(rbtdb->rpz_cidr, name);
1561 #endif
1562                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1563                 break;
1564         case DNS_RBT_NSEC_NSEC:
1565                 result = dns_rbt_deletenode(rbtdb->nsec, node, ISC_FALSE);
1566                 break;
1567         case DNS_RBT_NSEC_NSEC3:
1568                 result = dns_rbt_deletenode(rbtdb->nsec3, node, ISC_FALSE);
1569                 break;
1570         }
1571         if (result != ISC_R_SUCCESS) {
1572                 isc_log_write(dns_lctx,
1573                               DNS_LOGCATEGORY_DATABASE,
1574                               DNS_LOGMODULE_CACHE,
1575                               ISC_LOG_WARNING,
1576                               "delete_cnode(): "
1577                               "dns_rbt_deletenode: %s",
1578                               isc_result_totext(result));
1579         }
1580 }
1581
1582 /*%
1583  * Clean up dead nodes.  These are nodes which have no references, and
1584  * have no data.  They are dead but we could not or chose not to delete
1585  * them when we deleted all the data at that node because we did not want
1586  * to wait for the tree write lock.
1587  *
1588  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1589  */
1590 static void
1591 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1592         dns_rbtnode_t *node;
1593         int count = 10;         /* XXXJT: should be adjustable */
1594
1595         node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1596         while (node != NULL && count > 0) {
1597                 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1598
1599                 /*
1600                  * Since we're holding a tree write lock, it should be
1601                  * impossible for this node to be referenced by others.
1602                  */
1603                 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1604                        node->data == NULL);
1605
1606                 delete_node(rbtdb, node);
1607
1608                 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1609                 count--;
1610         }
1611 }
1612
1613 /*
1614  * Caller must be holding the node lock.
1615  */
1616 static inline void
1617 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1618         unsigned int lockrefs, noderefs;
1619         isc_refcount_t *lockref;
1620
1621         INSIST(!ISC_LINK_LINKED(node, deadlink));
1622         dns_rbtnode_refincrement0(node, &noderefs);
1623         if (noderefs == 1) {    /* this is the first reference to the node */
1624                 lockref = &rbtdb->node_locks[node->locknum].references;
1625                 isc_refcount_increment0(lockref, &lockrefs);
1626                 INSIST(lockrefs != 0);
1627         }
1628         INSIST(noderefs != 0);
1629 }
1630
1631 /*
1632  * This function is assumed to be called when a node is newly referenced
1633  * and can be in the deadnode list.  In that case the node must be retrieved
1634  * from the list because it is going to be used.  In addition, if the caller
1635  * happens to hold a write lock on the tree, it's a good chance to purge dead
1636  * nodes.
1637  * Note: while a new reference is gained in multiple places, there are only very
1638  * few cases where the node can be in the deadnode list (only empty nodes can
1639  * have been added to the list).
1640  */
1641 static inline void
1642 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1643                 isc_rwlocktype_t treelocktype)
1644 {
1645         isc_rwlocktype_t locktype = isc_rwlocktype_read;
1646         nodelock_t *nodelock = &rbtdb->node_locks[node->locknum].lock;
1647         isc_boolean_t maybe_cleanup = ISC_FALSE;
1648
1649         POST(locktype);
1650
1651         NODE_STRONGLOCK(nodelock);
1652         NODE_WEAKLOCK(nodelock, locktype);
1653
1654         /*
1655          * Check if we can possibly cleanup the dead node.  If so, upgrade
1656          * the node lock below to perform the cleanup.
1657          */
1658         if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1659             treelocktype == isc_rwlocktype_write) {
1660                 maybe_cleanup = ISC_TRUE;
1661         }
1662
1663         if (ISC_LINK_LINKED(node, deadlink) || maybe_cleanup) {
1664                 /*
1665                  * Upgrade the lock and test if we still need to unlink.
1666                  */
1667                 NODE_WEAKUNLOCK(nodelock, locktype);
1668                 locktype = isc_rwlocktype_write;
1669                 POST(locktype);
1670                 NODE_WEAKLOCK(nodelock, locktype);
1671                 if (ISC_LINK_LINKED(node, deadlink))
1672                         ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1673                                         node, deadlink);
1674                 if (maybe_cleanup)
1675                         cleanup_dead_nodes(rbtdb, node->locknum);
1676         }
1677
1678         new_reference(rbtdb, node);
1679
1680         NODE_WEAKUNLOCK(nodelock, locktype);
1681         NODE_STRONGUNLOCK(nodelock);
1682 }
1683
1684 /*
1685  * Caller must be holding the node lock; either the "strong", read or write
1686  * lock.  Note that the lock must be held even when node references are
1687  * atomically modified; in that case the decrement operation itself does not
1688  * have to be protected, but we must avoid a race condition where multiple
1689  * threads are decreasing the reference to zero simultaneously and at least
1690  * one of them is going to free the node.
1691  * This function returns ISC_TRUE if and only if the node reference decreases
1692  * to zero.
1693  */
1694 static isc_boolean_t
1695 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1696                     rbtdb_serial_t least_serial,
1697                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1698                     isc_boolean_t pruning)
1699 {
1700         isc_result_t result;
1701         isc_boolean_t write_locked;
1702         rbtdb_nodelock_t *nodelock;
1703         unsigned int refs, nrefs;
1704         int bucket = node->locknum;
1705         isc_boolean_t no_reference = ISC_TRUE;
1706
1707         nodelock = &rbtdb->node_locks[bucket];
1708
1709 #define KEEP_NODE(n, r) \
1710         ((n)->data != NULL || (n)->down != NULL || (n) == (r)->origin_node)
1711
1712         /* Handle easy and typical case first. */
1713         if (!node->dirty && KEEP_NODE(node, rbtdb)) {
1714                 dns_rbtnode_refdecrement(node, &nrefs);
1715                 INSIST((int)nrefs >= 0);
1716                 if (nrefs == 0) {
1717                         isc_refcount_decrement(&nodelock->references, &refs);
1718                         INSIST((int)refs >= 0);
1719                 }
1720                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1721         }
1722
1723         /* Upgrade the lock? */
1724         if (nlock == isc_rwlocktype_read) {
1725                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1726                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1727         }
1728
1729         dns_rbtnode_refdecrement(node, &nrefs);
1730         INSIST((int)nrefs >= 0);
1731         if (nrefs > 0) {
1732                 /* Restore the lock? */
1733                 if (nlock == isc_rwlocktype_read)
1734                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1735                 return (ISC_FALSE);
1736         }
1737
1738         if (node->dirty) {
1739                 if (IS_CACHE(rbtdb))
1740                         clean_cache_node(rbtdb, node);
1741                 else {
1742                         if (least_serial == 0) {
1743                                 /*
1744                                  * Caller doesn't know the least serial.
1745                                  * Get it.
1746                                  */
1747                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1748                                 least_serial = rbtdb->least_serial;
1749                                 RBTDB_UNLOCK(&rbtdb->lock,
1750                                              isc_rwlocktype_read);
1751                         }
1752                         clean_zone_node(rbtdb, node, least_serial);
1753                 }
1754         }
1755
1756         /*
1757          * Attempt to switch to a write lock on the tree.  If this fails,
1758          * we will add this node to a linked list of nodes in this locking
1759          * bucket which we will free later.
1760          */
1761         if (tlock != isc_rwlocktype_write) {
1762                 /*
1763                  * Locking hierarchy notwithstanding, we don't need to free
1764                  * the node lock before acquiring the tree write lock because
1765                  * we only do a trylock.
1766                  */
1767                 if (tlock == isc_rwlocktype_read)
1768                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1769                 else
1770                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
1771                                                     isc_rwlocktype_write);
1772                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1773                               result == ISC_R_LOCKBUSY);
1774
1775                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1776         } else
1777                 write_locked = ISC_TRUE;
1778
1779         isc_refcount_decrement(&nodelock->references, &refs);
1780         INSIST((int)refs >= 0);
1781
1782         if (KEEP_NODE(node, rbtdb))
1783                 goto restore_locks;
1784
1785 #undef KEEP_NODE
1786
1787         if (write_locked) {
1788                 /*
1789                  * We can now delete the node.
1790                  */
1791
1792                 /*
1793                  * If this node is the only one in the level it's in, deleting
1794                  * this node may recursively make its parent the only node in
1795                  * the parent level; if so, and if no one is currently using
1796                  * the parent node, this is almost the only opportunity to
1797                  * clean it up.  But the recursive cleanup is not that trivial
1798                  * since the child and parent may be in different lock buckets,
1799                  * which would cause a lock order reversal problem.  To avoid
1800                  * the trouble, we'll dispatch a separate event for batch
1801                  * cleaning.  We need to check whether we're deleting the node
1802                  * as a result of pruning to avoid infinite dispatching.
1803                  * Note: pruning happens only when a task has been set for the
1804                  * rbtdb.  If the user of the rbtdb chooses not to set a task,
1805                  * it's their responsibility to purge stale leaves (e.g. by
1806                  * periodic walk-through).
1807                  */
1808                 if (!pruning && node->parent != NULL &&
1809                     node->parent->down == node && node->left == NULL &&
1810                     node->right == NULL && rbtdb->task != NULL) {
1811                         isc_event_t *ev;
1812                         dns_db_t *db;
1813
1814                         ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1815                                                 DNS_EVENT_RBTPRUNE,
1816                                                 prune_tree, node,
1817                                                 sizeof(isc_event_t));
1818                         if (ev != NULL) {
1819                                 new_reference(rbtdb, node);
1820                                 db = NULL;
1821                                 attach((dns_db_t *)rbtdb, &db);
1822                                 ev->ev_sender = db;
1823                                 isc_task_send(rbtdb->task, &ev);
1824                                 no_reference = ISC_FALSE;
1825                         } else {
1826                                 /*
1827                                  * XXX: this is a weird situation.  We could
1828                                  * ignore this error case, but then the stale
1829                                  * node will unlikely be purged except via a
1830                                  * rare condition such as manual cleanup.  So
1831                                  * we queue it in the deadnodes list, hoping
1832                                  * the memory shortage is temporary and the node
1833                                  * will be deleted later.
1834                                  */
1835                                 isc_log_write(dns_lctx,
1836                                               DNS_LOGCATEGORY_DATABASE,
1837                                               DNS_LOGMODULE_CACHE,
1838                                               ISC_LOG_INFO,
1839                                               "decrement_reference: failed to "
1840                                               "allocate pruning event");
1841                                 INSIST(node->data == NULL);
1842                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1843                                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1844                                                 deadlink);
1845                         }
1846                 } else {
1847                         if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1848                                 char printname[DNS_NAME_FORMATSIZE];
1849
1850                                 isc_log_write(dns_lctx,
1851                                               DNS_LOGCATEGORY_DATABASE,
1852                                               DNS_LOGMODULE_CACHE,
1853                                               ISC_LOG_DEBUG(1),
1854                                               "decrement_reference: "
1855                                               "delete from rbt: %p %s",
1856                                               node,
1857                                               dns_rbt_formatnodename(node,
1858                                                         printname,
1859                                                         sizeof(printname)));
1860                         }
1861
1862                         delete_node(rbtdb, node);
1863                 }
1864         } else {
1865                 INSIST(node->data == NULL);
1866                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1867                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1868         }
1869
1870  restore_locks:
1871         /* Restore the lock? */
1872         if (nlock == isc_rwlocktype_read)
1873                 NODE_WEAKDOWNGRADE(&nodelock->lock);
1874
1875         /*
1876          * Relock a read lock, or unlock the write lock if no lock was held.
1877          */
1878         if (tlock == isc_rwlocktype_none)
1879                 if (write_locked)
1880                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1881
1882         if (tlock == isc_rwlocktype_read)
1883                 if (write_locked)
1884                         isc_rwlock_downgrade(&rbtdb->tree_lock);
1885
1886         return (no_reference);
1887 }
1888
1889 /*
1890  * Prune the tree by recursively cleaning-up single leaves.  In the worst
1891  * case, the number of iteration is the number of tree levels, which is at
1892  * most the maximum number of domain name labels, i.e, 127.  In practice, this
1893  * should be much smaller (only a few times), and even the worst case would be
1894  * acceptable for a single event.
1895  */
1896 static void
1897 prune_tree(isc_task_t *task, isc_event_t *event) {
1898         dns_rbtdb_t *rbtdb = event->ev_sender;
1899         dns_rbtnode_t *node = event->ev_arg;
1900         dns_rbtnode_t *parent;
1901         unsigned int locknum;
1902
1903         UNUSED(task);
1904
1905         isc_event_free(&event);
1906
1907         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1908         locknum = node->locknum;
1909         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1910         do {
1911                 parent = node->parent;
1912                 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1913                                     isc_rwlocktype_write, ISC_TRUE);
1914
1915                 if (parent != NULL && parent->down == NULL) {
1916                         /*
1917                          * node was the only down child of the parent and has
1918                          * just been removed.  We'll then need to examine the
1919                          * parent.  Keep the lock if possible; otherwise,
1920                          * release the old lock and acquire one for the parent.
1921                          */
1922                         if (parent->locknum != locknum) {
1923                                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1924                                             isc_rwlocktype_write);
1925                                 locknum = parent->locknum;
1926                                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1927                                           isc_rwlocktype_write);
1928                         }
1929
1930                         /*
1931                          * We need to gain a reference to the node before
1932                          * decrementing it in the next iteration.  In addition,
1933                          * if the node is in the dead-nodes list, extract it
1934                          * from the list beforehand as we do in
1935                          * reactivate_node().
1936                          */
1937                         if (ISC_LINK_LINKED(parent, deadlink))
1938                                 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1939                                                 parent, deadlink);
1940                         new_reference(rbtdb, parent);
1941                 } else
1942                         parent = NULL;
1943
1944                 node = parent;
1945         } while (node != NULL);
1946         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1947         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1948
1949         detach((dns_db_t **)&rbtdb);
1950 }
1951
1952 static inline void
1953 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1954                    rbtdb_changedlist_t *cleanup_list)
1955 {
1956         /*
1957          * Caller must be holding the database lock.
1958          */
1959
1960         rbtdb->least_serial = version->serial;
1961         *cleanup_list = version->changed_list;
1962         ISC_LIST_INIT(version->changed_list);
1963 }
1964
1965 static inline void
1966 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1967         rbtdb_changed_t *changed, *next_changed;
1968
1969         /*
1970          * If the changed record is dirty, then
1971          * an update created multiple versions of
1972          * a given rdataset.  We keep this list
1973          * until we're the least open version, at
1974          * which point it's safe to get rid of any
1975          * older versions.
1976          *
1977          * If the changed record isn't dirty, then
1978          * we don't need it anymore since we're
1979          * committing and not rolling back.
1980          *
1981          * The caller must be holding the database lock.
1982          */
1983         for (changed = HEAD(version->changed_list);
1984              changed != NULL;
1985              changed = next_changed) {
1986                 next_changed = NEXT(changed, link);
1987                 if (!changed->dirty) {
1988                         UNLINK(version->changed_list,
1989                                changed, link);
1990                         APPEND(*cleanup_list,
1991                                changed, link);
1992                 }
1993         }
1994 }
1995
1996 static void
1997 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1998 #ifndef BIND9
1999         UNUSED(db);
2000         UNUSED(version);
2001         UNUSED(origin);
2002
2003         return;
2004 #else
2005         dns_rdataset_t keyset;
2006         dns_rdataset_t nsecset, signsecset;
2007         isc_boolean_t haszonekey = ISC_FALSE;
2008         isc_boolean_t hasnsec = ISC_FALSE;
2009         isc_result_t result;
2010
2011         dns_rdataset_init(&keyset);
2012         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
2013                                      0, 0, &keyset, NULL);
2014         if (result == ISC_R_SUCCESS) {
2015                 result = dns_rdataset_first(&keyset);
2016                 while (result == ISC_R_SUCCESS) {
2017                         dns_rdata_t keyrdata = DNS_RDATA_INIT;
2018                         dns_rdataset_current(&keyset, &keyrdata);
2019                         if (dns_zonekey_iszonekey(&keyrdata)) {
2020                                 haszonekey = ISC_TRUE;
2021                                 break;
2022                         }
2023                         result = dns_rdataset_next(&keyset);
2024                 }
2025                 dns_rdataset_disassociate(&keyset);
2026         }
2027         if (!haszonekey) {
2028                 version->secure = dns_db_insecure;
2029                 version->havensec3 = ISC_FALSE;
2030                 return;
2031         }
2032
2033         dns_rdataset_init(&nsecset);
2034         dns_rdataset_init(&signsecset);
2035         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
2036                                      0, 0, &nsecset, &signsecset);
2037         if (result == ISC_R_SUCCESS) {
2038                 if (dns_rdataset_isassociated(&signsecset)) {
2039                         hasnsec = ISC_TRUE;
2040                         dns_rdataset_disassociate(&signsecset);
2041                 }
2042                 dns_rdataset_disassociate(&nsecset);
2043         }
2044
2045         setnsec3parameters(db, version);
2046
2047         /*
2048          * Do we have a valid NSEC/NSEC3 chain?
2049          */
2050         if (version->havensec3 || hasnsec)
2051                 version->secure = dns_db_secure;
2052         else
2053                 version->secure = dns_db_insecure;
2054 #endif
2055 }
2056
2057 /*%<
2058  * Walk the origin node looking for NSEC3PARAM records.
2059  * Cache the nsec3 parameters.
2060  */
2061 #ifdef BIND9
2062 static void
2063 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version) {
2064         dns_rbtnode_t *node;
2065         dns_rdata_nsec3param_t nsec3param;
2066         dns_rdata_t rdata = DNS_RDATA_INIT;
2067         isc_region_t region;
2068         isc_result_t result;
2069         rdatasetheader_t *header, *header_next;
2070         unsigned char *raw;             /* RDATASLAB */
2071         unsigned int count, length;
2072         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2073
2074         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2075         version->havensec3 = ISC_FALSE;
2076         node = rbtdb->origin_node;
2077         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2078                   isc_rwlocktype_read);
2079         for (header = node->data;
2080              header != NULL;
2081              header = header_next) {
2082                 header_next = header->next;
2083                 do {
2084                         if (header->serial <= version->serial &&
2085                             !IGNORE(header)) {
2086                                 if (NONEXISTENT(header))
2087                                         header = NULL;
2088                                 break;
2089                         } else
2090                                 header = header->down;
2091                 } while (header != NULL);
2092
2093                 if (header != NULL &&
2094                     (header->type == dns_rdatatype_nsec3param)) {
2095                         /*
2096                          * Find A NSEC3PARAM with a supported algorithm.
2097                          */
2098                         raw = (unsigned char *)header + sizeof(*header);
2099                         count = raw[0] * 256 + raw[1]; /* count */
2100 #if DNS_RDATASET_FIXED
2101                         raw += count * 4 + 2;
2102 #else
2103                         raw += 2;
2104 #endif
2105                         while (count-- > 0U) {
2106                                 length = raw[0] * 256 + raw[1];
2107 #if DNS_RDATASET_FIXED
2108                                 raw += 4;
2109 #else
2110                                 raw += 2;
2111 #endif
2112                                 region.base = raw;
2113                                 region.length = length;
2114                                 raw += length;
2115                                 dns_rdata_fromregion(&rdata,
2116                                                      rbtdb->common.rdclass,
2117                                                      dns_rdatatype_nsec3param,
2118                                                      &region);
2119                                 result = dns_rdata_tostruct(&rdata,
2120                                                             &nsec3param,
2121                                                             NULL);
2122                                 INSIST(result == ISC_R_SUCCESS);
2123                                 dns_rdata_reset(&rdata);
2124
2125                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2126                                     !dns_nsec3_supportedhash(nsec3param.hash))
2127                                         continue;
2128
2129                                 if (nsec3param.flags != 0)
2130                                         continue;
2131
2132                                 memmove(version->salt, nsec3param.salt,
2133                                         nsec3param.salt_length);
2134                                 version->hash = nsec3param.hash;
2135                                 version->salt_length = nsec3param.salt_length;
2136                                 version->iterations = nsec3param.iterations;
2137                                 version->flags = nsec3param.flags;
2138                                 version->havensec3 = ISC_TRUE;
2139                                 /*
2140                                  * Look for a better algorithm than the
2141                                  * unknown test algorithm.
2142                                  */
2143                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2144                                         goto unlock;
2145                         }
2146                 }
2147         }
2148  unlock:
2149         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2150                     isc_rwlocktype_read);
2151         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2152 }
2153 #endif
2154
2155 static void
2156 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
2157         dns_rbtdb_t *rbtdb = event->ev_arg;
2158         isc_boolean_t again = ISC_FALSE;
2159         unsigned int locknum;
2160         unsigned int refs;
2161
2162         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2163         for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
2164                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2165                           isc_rwlocktype_write);
2166                 cleanup_dead_nodes(rbtdb, locknum);
2167                 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL)
2168                         again = ISC_TRUE;
2169                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2170                             isc_rwlocktype_write);
2171         }
2172         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2173         if (again)
2174                 isc_task_send(task, &event);
2175         else {
2176                 isc_event_free(&event);
2177                 isc_refcount_decrement(&rbtdb->references, &refs);
2178                 if (refs == 0)
2179                         maybe_free_rbtdb(rbtdb);
2180         }
2181 }
2182
2183 static void
2184 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2185         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2186         rbtdb_version_t *version, *cleanup_version, *least_greater;
2187         isc_boolean_t rollback = ISC_FALSE;
2188         rbtdb_changedlist_t cleanup_list;
2189         rdatasetheaderlist_t resigned_list;
2190         rbtdb_changed_t *changed, *next_changed;
2191         rbtdb_serial_t serial, least_serial;
2192         dns_rbtnode_t *rbtnode;
2193         unsigned int refs;
2194         rdatasetheader_t *header;
2195         isc_boolean_t writer;
2196
2197         REQUIRE(VALID_RBTDB(rbtdb));
2198         version = (rbtdb_version_t *)*versionp;
2199         INSIST(version->rbtdb == rbtdb);
2200
2201         cleanup_version = NULL;
2202         ISC_LIST_INIT(cleanup_list);
2203         ISC_LIST_INIT(resigned_list);
2204
2205         isc_refcount_decrement(&version->references, &refs);
2206         if (refs > 0) {         /* typical and easy case first */
2207                 if (commit) {
2208                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2209                         INSIST(!version->writer);
2210                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2211                 }
2212                 goto end;
2213         }
2214
2215         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2216         serial = version->serial;
2217         writer = version->writer;
2218         if (version->writer) {
2219                 if (commit) {
2220                         unsigned cur_ref;
2221                         rbtdb_version_t *cur_version;
2222
2223                         INSIST(version->commit_ok);
2224                         INSIST(version == rbtdb->future_version);
2225                         /*
2226                          * The current version is going to be replaced.
2227                          * Release the (likely last) reference to it from the
2228                          * DB itself and unlink it from the open list.
2229                          */
2230                         cur_version = rbtdb->current_version;
2231                         isc_refcount_decrement(&cur_version->references,
2232                                                &cur_ref);
2233                         if (cur_ref == 0) {
2234                                 if (cur_version->serial == rbtdb->least_serial)
2235                                         INSIST(EMPTY(cur_version->changed_list));
2236                                 UNLINK(rbtdb->open_versions,
2237                                        cur_version, link);
2238                         }
2239                         if (EMPTY(rbtdb->open_versions)) {
2240                                 /*
2241                                  * We're going to become the least open
2242                                  * version.
2243                                  */
2244                                 make_least_version(rbtdb, version,
2245                                                    &cleanup_list);
2246                         } else {
2247                                 /*
2248                                  * Some other open version is the
2249                                  * least version.  We can't cleanup
2250                                  * records that were changed in this
2251                                  * version because the older versions
2252                                  * may still be in use by an open
2253                                  * version.
2254                                  *
2255                                  * We can, however, discard the
2256                                  * changed records for things that
2257                                  * we've added that didn't exist in
2258                                  * prior versions.
2259                                  */
2260                                 cleanup_nondirty(version, &cleanup_list);
2261                         }
2262                         /*
2263                          * If the (soon to be former) current version
2264                          * isn't being used by anyone, we can clean
2265                          * it up.
2266                          */
2267                         if (cur_ref == 0) {
2268                                 cleanup_version = cur_version;
2269                                 APPENDLIST(version->changed_list,
2270                                            cleanup_version->changed_list,
2271                                            link);
2272                         }
2273                         /*
2274                          * Become the current version.
2275                          */
2276                         version->writer = ISC_FALSE;
2277                         rbtdb->current_version = version;
2278                         rbtdb->current_serial = version->serial;
2279                         rbtdb->future_version = NULL;
2280
2281                         /*
2282                          * Keep the current version in the open list, and
2283                          * gain a reference for the DB itself (see the DB
2284                          * creation function below).  This must be the only
2285                          * case where we need to increment the counter from
2286                          * zero and need to use isc_refcount_increment0().
2287                          */
2288                         isc_refcount_increment0(&version->references,
2289                                                 &cur_ref);
2290                         INSIST(cur_ref == 1);
2291                         PREPEND(rbtdb->open_versions,
2292                                 rbtdb->current_version, link);
2293                         resigned_list = version->resigned_list;
2294                         ISC_LIST_INIT(version->resigned_list);
2295                 } else {
2296                         /*
2297                          * We're rolling back this transaction.
2298                          */
2299                         cleanup_list = version->changed_list;
2300                         ISC_LIST_INIT(version->changed_list);
2301                         resigned_list = version->resigned_list;
2302                         ISC_LIST_INIT(version->resigned_list);
2303                         rollback = ISC_TRUE;
2304                         cleanup_version = version;
2305                         rbtdb->future_version = NULL;
2306                 }
2307         } else {
2308                 if (version != rbtdb->current_version) {
2309                         /*
2310                          * There are no external or internal references
2311                          * to this version and it can be cleaned up.
2312                          */
2313                         cleanup_version = version;
2314
2315                         /*
2316                          * Find the version with the least serial
2317                          * number greater than ours.
2318                          */
2319                         least_greater = PREV(version, link);
2320                         if (least_greater == NULL)
2321                                 least_greater = rbtdb->current_version;
2322
2323                         INSIST(version->serial < least_greater->serial);
2324                         /*
2325                          * Is this the least open version?
2326                          */
2327                         if (version->serial == rbtdb->least_serial) {
2328                                 /*
2329                                  * Yes.  Install the new least open
2330                                  * version.
2331                                  */
2332                                 make_least_version(rbtdb,
2333                                                    least_greater,
2334                                                    &cleanup_list);
2335                         } else {
2336                                 /*
2337                                  * Add any unexecuted cleanups to
2338                                  * those of the least greater version.
2339                                  */
2340                                 APPENDLIST(least_greater->changed_list,
2341                                            version->changed_list,
2342                                            link);
2343                         }
2344                 } else if (version->serial == rbtdb->least_serial)
2345                         INSIST(EMPTY(version->changed_list));
2346                 UNLINK(rbtdb->open_versions, version, link);
2347         }
2348         least_serial = rbtdb->least_serial;
2349         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2350
2351         /*
2352          * Update the zone's secure status.
2353          */
2354         if (writer && commit && !IS_CACHE(rbtdb))
2355                 iszonesecure(db, version, rbtdb->origin_node);
2356
2357         if (cleanup_version != NULL) {
2358                 INSIST(EMPTY(cleanup_version->changed_list));
2359                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2360                             sizeof(*cleanup_version));
2361         }
2362
2363         /*
2364          * Commit/rollback re-signed headers.
2365          */
2366         for (header = HEAD(resigned_list);
2367              header != NULL;
2368              header = HEAD(resigned_list)) {
2369                 nodelock_t *lock;
2370
2371                 ISC_LIST_UNLINK(resigned_list, header, link);
2372
2373                 lock = &rbtdb->node_locks[header->node->locknum].lock;
2374                 NODE_LOCK(lock, isc_rwlocktype_write);
2375                 if (rollback)
2376                         resign_insert(rbtdb, header->node->locknum, header);
2377                 decrement_reference(rbtdb, header->node, least_serial,
2378                                     isc_rwlocktype_write, isc_rwlocktype_none,
2379                                     ISC_FALSE);
2380                 NODE_UNLOCK(lock, isc_rwlocktype_write);
2381         }
2382
2383         if (!EMPTY(cleanup_list)) {
2384                 isc_event_t *event = NULL;
2385                 isc_rwlocktype_t tlock = isc_rwlocktype_none;
2386
2387                 if (rbtdb->task != NULL)
2388                         event = isc_event_allocate(rbtdb->common.mctx, NULL,
2389                                                    DNS_EVENT_RBTDEADNODES,
2390                                                    cleanup_dead_nodes_callback,
2391                                                    rbtdb, sizeof(isc_event_t));
2392                 if (event == NULL) {
2393                         /*
2394                          * We acquire a tree write lock here in order to make
2395                          * sure that stale nodes will be removed in
2396                          * decrement_reference().  If we didn't have the lock,
2397                          * those nodes could miss the chance to be removed
2398                          * until the server stops.  The write lock is
2399                          * expensive, but this event should be rare enough
2400                          * to justify the cost.
2401                          */
2402                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2403                         tlock = isc_rwlocktype_write;
2404                 }
2405
2406                 for (changed = HEAD(cleanup_list);
2407                      changed != NULL;
2408                      changed = next_changed) {
2409                         nodelock_t *lock;
2410
2411                         next_changed = NEXT(changed, link);
2412                         rbtnode = changed->node;
2413                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2414
2415                         NODE_LOCK(lock, isc_rwlocktype_write);
2416                         /*
2417                          * This is a good opportunity to purge any dead nodes,
2418                          * so use it.
2419                          */
2420                         if (event == NULL)
2421                                 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2422
2423                         if (rollback)
2424                                 rollback_node(rbtnode, serial);
2425                         decrement_reference(rbtdb, rbtnode, least_serial,
2426                                             isc_rwlocktype_write, tlock,
2427                                             ISC_FALSE);
2428
2429                         NODE_UNLOCK(lock, isc_rwlocktype_write);
2430
2431                         isc_mem_put(rbtdb->common.mctx, changed,
2432                                     sizeof(*changed));
2433                 }
2434                 if (event != NULL) {
2435                         isc_refcount_increment(&rbtdb->references, NULL);
2436                         isc_task_send(rbtdb->task, &event);
2437                 } else
2438                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2439         }
2440
2441  end:
2442         *versionp = NULL;
2443 }
2444
2445 /*
2446  * Add the necessary magic for the wildcard name 'name'
2447  * to be found in 'rbtdb'.
2448  *
2449  * In order for wildcard matching to work correctly in
2450  * zone_find(), we must ensure that a node for the wildcarding
2451  * level exists in the database, and has its 'find_callback'
2452  * and 'wild' bits set.
2453  *
2454  * E.g. if the wildcard name is "*.sub.example." then we
2455  * must ensure that "sub.example." exists and is marked as
2456  * a wildcard level.
2457  */
2458 static isc_result_t
2459 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2460         isc_result_t result;
2461         dns_name_t foundname;
2462         dns_offsets_t offsets;
2463         unsigned int n;
2464         dns_rbtnode_t *node = NULL;
2465
2466         dns_name_init(&foundname, offsets);
2467         n = dns_name_countlabels(name);
2468         INSIST(n >= 2);
2469         n--;
2470         dns_name_getlabelsequence(name, 1, n, &foundname);
2471         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2472         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2473                 return (result);
2474         if (result == ISC_R_SUCCESS)
2475                 node->nsec = DNS_RBT_NSEC_NORMAL;
2476         node->find_callback = 1;
2477         node->wild = 1;
2478         return (ISC_R_SUCCESS);
2479 }
2480
2481 static isc_result_t
2482 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2483         isc_result_t result;
2484         dns_name_t foundname;
2485         dns_offsets_t offsets;
2486         unsigned int n, l, i;
2487
2488         dns_name_init(&foundname, offsets);
2489         n = dns_name_countlabels(name);
2490         l = dns_name_countlabels(&rbtdb->common.origin);
2491         i = l + 1;
2492         while (i < n) {
2493                 dns_rbtnode_t *node = NULL;     /* dummy */
2494                 dns_name_getlabelsequence(name, n - i, i, &foundname);
2495                 if (dns_name_iswildcard(&foundname)) {
2496                         result = add_wildcard_magic(rbtdb, &foundname);
2497                         if (result != ISC_R_SUCCESS)
2498                                 return (result);
2499                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
2500                                                  &node);
2501                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2502                                 return (result);
2503                         if (result == ISC_R_SUCCESS)
2504                                 node->nsec = DNS_RBT_NSEC_NORMAL;
2505                 }
2506                 i++;
2507         }
2508         return (ISC_R_SUCCESS);
2509 }
2510
2511 static isc_result_t
2512 findnodeintree(dns_rbtdb_t *rbtdb, dns_rbt_t *tree, dns_name_t *name,
2513                isc_boolean_t create, dns_dbnode_t **nodep)
2514 {
2515         dns_rbtnode_t *node = NULL;
2516         dns_name_t nodename;
2517         isc_result_t result;
2518         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2519
2520         INSIST(tree == rbtdb->tree || tree == rbtdb->nsec3);
2521
2522         dns_name_init(&nodename, NULL);
2523         RWLOCK(&rbtdb->tree_lock, locktype);
2524         result = dns_rbt_findnode(tree, name, NULL, &node, NULL,
2525                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2526         if (result != ISC_R_SUCCESS) {
2527                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2528                 if (!create) {
2529                         if (result == DNS_R_PARTIALMATCH)
2530                                 result = ISC_R_NOTFOUND;
2531                         return (result);
2532                 }
2533                 /*
2534                  * It would be nice to try to upgrade the lock instead of
2535                  * unlocking then relocking.
2536                  */
2537                 locktype = isc_rwlocktype_write;
2538                 RWLOCK(&rbtdb->tree_lock, locktype);
2539                 node = NULL;
2540                 result = dns_rbt_addnode(tree, name, &node);
2541                 if (result == ISC_R_SUCCESS) {
2542 #ifdef BIND9
2543                         if (tree == rbtdb->tree && rbtdb->rpz_cidr != NULL) {
2544                                 dns_fixedname_t fnamef;
2545                                 dns_name_t *fname;
2546
2547                                 dns_fixedname_init(&fnamef);
2548                                 fname = dns_fixedname_name(&fnamef);
2549                                 dns_rbt_fullnamefromnode(node, fname);
2550                                 dns_rpz_cidr_addip(rbtdb->rpz_cidr, fname);
2551                         }
2552 #endif
2553                         dns_rbt_namefromnode(node, &nodename);
2554 #ifdef DNS_RBT_USEHASH
2555                         node->locknum = node->hashval % rbtdb->node_lock_count;
2556 #else
2557                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2558                                 rbtdb->node_lock_count;
2559 #endif
2560                         if (tree == rbtdb->tree) {
2561                                 add_empty_wildcards(rbtdb, name);
2562
2563                                 if (dns_name_iswildcard(name)) {
2564                                         result = add_wildcard_magic(rbtdb, name);
2565                                         if (result != ISC_R_SUCCESS) {
2566                                                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2567                                                 return (result);
2568                                         }
2569                                 }
2570                         }
2571                         if (tree == rbtdb->nsec3)
2572                                 node->nsec = DNS_RBT_NSEC_NSEC3;
2573                 } else if (result != ISC_R_EXISTS) {
2574                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2575                         return (result);
2576                 }
2577         }
2578
2579         if (tree == rbtdb->nsec3)
2580                 INSIST(node->nsec == DNS_RBT_NSEC_NSEC3);
2581
2582         reactivate_node(rbtdb, node, locktype);
2583         RWUNLOCK(&rbtdb->tree_lock, locktype);
2584
2585         *nodep = (dns_dbnode_t *)node;
2586
2587         return (ISC_R_SUCCESS);
2588 }
2589
2590 static isc_result_t
2591 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2592          dns_dbnode_t **nodep)
2593 {
2594         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2595
2596         REQUIRE(VALID_RBTDB(rbtdb));
2597
2598         return (findnodeintree(rbtdb, rbtdb->tree, name, create, nodep));
2599 }
2600
2601 static isc_result_t
2602 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2603               dns_dbnode_t **nodep)
2604 {
2605         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2606
2607         REQUIRE(VALID_RBTDB(rbtdb));
2608
2609         return (findnodeintree(rbtdb, rbtdb->nsec3, name, create, nodep));
2610 }
2611
2612 static isc_result_t
2613 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2614         rbtdb_search_t *search = arg;
2615         rdatasetheader_t *header, *header_next;
2616         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2617         rdatasetheader_t *found;
2618         isc_result_t result;
2619         dns_rbtnode_t *onode;
2620
2621         /*
2622          * We only want to remember the topmost zone cut, since it's the one
2623          * that counts, so we'll just continue if we've already found a
2624          * zonecut.
2625          */
2626         if (search->zonecut != NULL)
2627                 return (DNS_R_CONTINUE);
2628
2629         found = NULL;
2630         result = DNS_R_CONTINUE;
2631         onode = search->rbtdb->origin_node;
2632
2633         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2634                   isc_rwlocktype_read);
2635
2636         /*
2637          * Look for an NS or DNAME rdataset active in our version.
2638          */
2639         ns_header = NULL;
2640         dname_header = NULL;
2641         sigdname_header = NULL;
2642         for (header = node->data; header != NULL; header = header_next) {
2643                 header_next = header->next;
2644                 if (header->type == dns_rdatatype_ns ||
2645                     header->type == dns_rdatatype_dname ||
2646                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
2647                         do {
2648                                 if (header->serial <= search->serial &&
2649                                     !IGNORE(header)) {
2650                                         /*
2651                                          * Is this a "this rdataset doesn't
2652                                          * exist" record?
2653                                          */
2654                                         if (NONEXISTENT(header))
2655                                                 header = NULL;
2656                                         break;
2657                                 } else
2658                                         header = header->down;
2659                         } while (header != NULL);
2660                         if (header != NULL) {
2661                                 if (header->type == dns_rdatatype_dname)
2662                                         dname_header = header;
2663                                 else if (header->type ==
2664                                            RBTDB_RDATATYPE_SIGDNAME)
2665                                         sigdname_header = header;
2666                                 else if (node != onode ||
2667                                          IS_STUB(search->rbtdb)) {
2668                                         /*
2669                                          * We've found an NS rdataset that
2670                                          * isn't at the origin node.  We check
2671                                          * that they're not at the origin node,
2672                                          * because otherwise we'd erroneously
2673                                          * treat the zone top as if it were
2674                                          * a delegation.
2675                                          */
2676                                         ns_header = header;
2677                                 }
2678                         }
2679                 }
2680         }
2681
2682         /*
2683          * Did we find anything?
2684          */
2685         if (!IS_CACHE(search->rbtdb) && !IS_STUB(search->rbtdb) &&
2686             ns_header != NULL) {
2687                 /*
2688                  * Note that NS has precedence over DNAME if both exist
2689                  * in a zone.  Otherwise DNAME take precedence over NS.
2690                  */
2691                 found = ns_header;
2692                 search->zonecut_sigrdataset = NULL;
2693         } else if (dname_header != NULL) {
2694                 found = dname_header;
2695                 search->zonecut_sigrdataset = sigdname_header;
2696         } else if (ns_header != NULL) {
2697                 found = ns_header;
2698                 search->zonecut_sigrdataset = NULL;
2699         }
2700
2701         if (found != NULL) {
2702                 /*
2703                  * We increment the reference count on node to ensure that
2704                  * search->zonecut_rdataset will still be valid later.
2705                  */
2706                 new_reference(search->rbtdb, node);
2707                 search->zonecut = node;
2708                 search->zonecut_rdataset = found;
2709                 search->need_cleanup = ISC_TRUE;
2710                 /*
2711                  * Since we've found a zonecut, anything beneath it is
2712                  * glue and is not subject to wildcard matching, so we
2713                  * may clear search->wild.
2714                  */
2715                 search->wild = ISC_FALSE;
2716                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2717                         /*
2718                          * If the caller does not want to find glue, then
2719                          * this is the best answer and the search should
2720                          * stop now.
2721                          */
2722                         result = DNS_R_PARTIALMATCH;
2723                 } else {
2724                         dns_name_t *zcname;
2725
2726                         /*
2727                          * The search will continue beneath the zone cut.
2728                          * This may or may not be the best match.  In case it
2729                          * is, we need to remember the node name.
2730                          */
2731                         zcname = dns_fixedname_name(&search->zonecut_name);
2732                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2733                                       ISC_R_SUCCESS);
2734                         search->copy_name = ISC_TRUE;
2735                 }
2736         } else {
2737                 /*
2738                  * There is no zonecut at this node which is active in this
2739                  * version.
2740                  *
2741                  * If this is a "wild" node and the caller hasn't disabled
2742                  * wildcard matching, remember that we've seen a wild node
2743                  * in case we need to go searching for wildcard matches
2744                  * later on.
2745                  */
2746                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2747                         search->wild = ISC_TRUE;
2748         }
2749
2750         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2751                     isc_rwlocktype_read);
2752
2753         return (result);
2754 }
2755
2756 static inline void
2757 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2758               rdatasetheader_t *header, isc_stdtime_t now,
2759               dns_rdataset_t *rdataset)
2760 {
2761         unsigned char *raw;     /* RDATASLAB */
2762
2763         /*
2764          * Caller must be holding the node reader lock.
2765          * XXXJT: technically, we need a writer lock, since we'll increment
2766          * the header count below.  However, since the actual counter value
2767          * doesn't matter, we prioritize performance here.  (We may want to
2768          * use atomic increment when available).
2769          */
2770
2771         if (rdataset == NULL)
2772                 return;
2773
2774         new_reference(rbtdb, node);
2775
2776         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
2777
2778         rdataset->methods = &rdataset_methods;
2779         rdataset->rdclass = rbtdb->common.rdclass;
2780         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2781         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2782         rdataset->ttl = header->rdh_ttl - now;
2783         rdataset->trust = header->trust;
2784         if (NEGATIVE(header))
2785                 rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
2786         if (NXDOMAIN(header))
2787                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2788         if (OPTOUT(header))
2789                 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2790         rdataset->private1 = rbtdb;
2791         rdataset->private2 = node;
2792         raw = (unsigned char *)header + sizeof(*header);
2793         rdataset->private3 = raw;
2794         rdataset->count = header->count++;
2795         if (rdataset->count == ISC_UINT32_MAX)
2796                 rdataset->count = 0;
2797
2798         /*
2799          * Reset iterator state.
2800          */
2801         rdataset->privateuint4 = 0;
2802         rdataset->private5 = NULL;
2803
2804         /*
2805          * Add noqname proof.
2806          */
2807         rdataset->private6 = header->noqname;
2808         if (rdataset->private6 != NULL)
2809                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
2810         rdataset->private7 = header->closest;
2811         if (rdataset->private7 != NULL)
2812                 rdataset->attributes |=  DNS_RDATASETATTR_CLOSEST;
2813
2814         /*
2815          * Copy out re-signing information.
2816          */
2817         if (RESIGN(header)) {
2818                 rdataset->attributes |=  DNS_RDATASETATTR_RESIGN;
2819                 rdataset->resign = header->resign;
2820         } else
2821                 rdataset->resign = 0;
2822 }
2823
2824 static inline isc_result_t
2825 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2826                  dns_name_t *foundname, dns_rdataset_t *rdataset,
2827                  dns_rdataset_t *sigrdataset)
2828 {
2829         isc_result_t result;
2830         dns_name_t *zcname;
2831         rbtdb_rdatatype_t type;
2832         dns_rbtnode_t *node;
2833
2834         /*
2835          * The caller MUST NOT be holding any node locks.
2836          */
2837
2838         node = search->zonecut;
2839         type = search->zonecut_rdataset->type;
2840
2841         /*
2842          * If we have to set foundname, we do it before anything else.
2843          * If we were to set foundname after we had set nodep or bound the
2844          * rdataset, then we'd have to undo that work if dns_name_copy()
2845          * failed.  By setting foundname first, there's nothing to undo if
2846          * we have trouble.
2847          */
2848         if (foundname != NULL && search->copy_name) {
2849                 zcname = dns_fixedname_name(&search->zonecut_name);
2850                 result = dns_name_copy(zcname, foundname, NULL);
2851                 if (result != ISC_R_SUCCESS)
2852                         return (result);
2853         }
2854         if (nodep != NULL) {
2855                 /*
2856                  * Note that we don't have to increment the node's reference
2857                  * count here because we're going to use the reference we
2858                  * already have in the search block.
2859                  */
2860                 *nodep = node;
2861                 search->need_cleanup = ISC_FALSE;
2862         }
2863         if (rdataset != NULL) {
2864                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2865                           isc_rwlocktype_read);
2866                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2867                               search->now, rdataset);
2868                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2869                         bind_rdataset(search->rbtdb, node,
2870                                       search->zonecut_sigrdataset,
2871                                       search->now, sigrdataset);
2872                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2873                             isc_rwlocktype_read);
2874         }
2875
2876         if (type == dns_rdatatype_dname)
2877                 return (DNS_R_DNAME);
2878         return (DNS_R_DELEGATION);
2879 }
2880
2881 static inline isc_boolean_t
2882 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2883            dns_rbtnode_t *node)
2884 {
2885         unsigned char *raw;     /* RDATASLAB */
2886         unsigned int count, size;
2887         dns_name_t ns_name;
2888         isc_boolean_t valid = ISC_FALSE;
2889         dns_offsets_t offsets;
2890         isc_region_t region;
2891         rdatasetheader_t *header;
2892
2893         /*
2894          * No additional locking is required.
2895          */
2896
2897         /*
2898          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
2899          * if it occurs at a zone cut, but is not valid below it.
2900          */
2901         if (type == dns_rdatatype_ns) {
2902                 if (node != search->zonecut) {
2903                         return (ISC_FALSE);
2904                 }
2905         } else if (type != dns_rdatatype_a &&
2906                    type != dns_rdatatype_aaaa &&
2907                    type != dns_rdatatype_a6) {
2908                 return (ISC_FALSE);
2909         }
2910
2911         header = search->zonecut_rdataset;
2912         raw = (unsigned char *)header + sizeof(*header);
2913         count = raw[0] * 256 + raw[1];
2914 #if DNS_RDATASET_FIXED
2915         raw += 2 + (4 * count);
2916 #else
2917         raw += 2;
2918 #endif
2919
2920         while (count > 0) {
2921                 count--;
2922                 size = raw[0] * 256 + raw[1];
2923 #if DNS_RDATASET_FIXED
2924                 raw += 4;
2925 #else
2926                 raw += 2;
2927 #endif
2928                 region.base = raw;
2929                 region.length = size;
2930                 raw += size;
2931                 /*
2932                  * XXX Until we have rdata structures, we have no choice but
2933                  * to directly access the rdata format.
2934                  */
2935                 dns_name_init(&ns_name, offsets);
2936                 dns_name_fromregion(&ns_name, &region);
2937                 if (dns_name_compare(&ns_name, name) == 0) {
2938                         valid = ISC_TRUE;
2939                         break;
2940                 }
2941         }
2942
2943         return (valid);
2944 }
2945
2946 static inline isc_boolean_t
2947 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2948             dns_name_t *name)
2949 {
2950         dns_fixedname_t fnext;
2951         dns_fixedname_t forigin;
2952         dns_name_t *next;
2953         dns_name_t *origin;
2954         dns_name_t prefix;
2955         dns_rbtdb_t *rbtdb;
2956         dns_rbtnode_t *node;
2957         isc_result_t result;
2958         isc_boolean_t answer = ISC_FALSE;
2959         rdatasetheader_t *header;
2960
2961         rbtdb = search->rbtdb;
2962
2963         dns_name_init(&prefix, NULL);
2964         dns_fixedname_init(&fnext);
2965         next = dns_fixedname_name(&fnext);
2966         dns_fixedname_init(&forigin);
2967         origin = dns_fixedname_name(&forigin);
2968
2969         result = dns_rbtnodechain_next(chain, NULL, NULL);
2970         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2971                 node = NULL;
2972                 result = dns_rbtnodechain_current(chain, &prefix,
2973                                                   origin, &node);
2974                 if (result != ISC_R_SUCCESS)
2975                         break;
2976                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2977                           isc_rwlocktype_read);
2978                 for (header = node->data;
2979                      header != NULL;
2980                      header = header->next) {
2981                         if (header->serial <= search->serial &&
2982                             !IGNORE(header) && EXISTS(header))
2983                                 break;
2984                 }
2985                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2986                             isc_rwlocktype_read);
2987                 if (header != NULL)
2988                         break;
2989                 result = dns_rbtnodechain_next(chain, NULL, NULL);
2990         }
2991         if (result == ISC_R_SUCCESS)
2992                 result = dns_name_concatenate(&prefix, origin, next, NULL);
2993         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2994                 answer = ISC_TRUE;
2995         return (answer);
2996 }
2997
2998 static inline isc_boolean_t
2999 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
3000         dns_fixedname_t fnext;
3001         dns_fixedname_t forigin;
3002         dns_fixedname_t fprev;
3003         dns_name_t *next;
3004         dns_name_t *origin;
3005         dns_name_t *prev;
3006         dns_name_t name;
3007         dns_name_t rname;
3008         dns_name_t tname;
3009         dns_rbtdb_t *rbtdb;
3010         dns_rbtnode_t *node;
3011         dns_rbtnodechain_t chain;
3012         isc_boolean_t check_next = ISC_TRUE;
3013         isc_boolean_t check_prev = ISC_TRUE;
3014         isc_boolean_t answer = ISC_FALSE;
3015         isc_result_t result;
3016         rdatasetheader_t *header;
3017         unsigned int n;
3018
3019         rbtdb = search->rbtdb;
3020
3021         dns_name_init(&name, NULL);
3022         dns_name_init(&tname, NULL);
3023         dns_name_init(&rname, NULL);
3024         dns_fixedname_init(&fnext);
3025         next = dns_fixedname_name(&fnext);
3026         dns_fixedname_init(&fprev);
3027         prev = dns_fixedname_name(&fprev);
3028         dns_fixedname_init(&forigin);
3029         origin = dns_fixedname_name(&forigin);
3030
3031         /*
3032          * Find if qname is at or below a empty node.
3033          * Use our own copy of the chain.
3034          */
3035
3036         chain = search->chain;
3037         do {
3038                 node = NULL;
3039                 result = dns_rbtnodechain_current(&chain, &name,
3040                                                   origin, &node);
3041                 if (result != ISC_R_SUCCESS)
3042                         break;
3043                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3044                           isc_rwlocktype_read);
3045                 for (header = node->data;
3046                      header != NULL;
3047                      header = header->next) {
3048                         if (header->serial <= search->serial &&
3049                             !IGNORE(header) && EXISTS(header))
3050                                 break;
3051                 }
3052                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3053                             isc_rwlocktype_read);
3054                 if (header != NULL)
3055                         break;
3056                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
3057         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
3058         if (result == ISC_R_SUCCESS)
3059                 result = dns_name_concatenate(&name, origin, prev, NULL);
3060         if (result != ISC_R_SUCCESS)
3061                 check_prev = ISC_FALSE;
3062
3063         result = dns_rbtnodechain_next(&chain, NULL, NULL);
3064         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3065                 node = NULL;
3066                 result = dns_rbtnodechain_current(&chain, &name,
3067                                                   origin, &node);
3068                 if (result != ISC_R_SUCCESS)
3069                         break;
3070                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3071                           isc_rwlocktype_read);
3072                 for (header = node->data;
3073                      header != NULL;
3074                      header = header->next) {
3075                         if (header->serial <= search->serial &&
3076                             !IGNORE(header) && EXISTS(header))
3077                                 break;
3078                 }
3079                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3080                             isc_rwlocktype_read);
3081                 if (header != NULL)
3082                         break;
3083                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3084         }
3085         if (result == ISC_R_SUCCESS)
3086                 result = dns_name_concatenate(&name, origin, next, NULL);
3087         if (result != ISC_R_SUCCESS)
3088                 check_next = ISC_FALSE;
3089
3090         dns_name_clone(qname, &rname);
3091
3092         /*
3093          * Remove the wildcard label to find the terminal name.
3094          */
3095         n = dns_name_countlabels(wname);
3096         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3097
3098         do {
3099                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3100                     (check_next && dns_name_issubdomain(next, &rname))) {
3101                         answer = ISC_TRUE;
3102                         break;
3103                 }
3104                 /*
3105                  * Remove the left hand label.
3106                  */
3107                 n = dns_name_countlabels(&rname);
3108                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3109         } while (!dns_name_equal(&rname, &tname));
3110         return (answer);
3111 }
3112
3113 static inline isc_result_t
3114 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3115               dns_name_t *qname)
3116 {
3117         unsigned int i, j;
3118         dns_rbtnode_t *node, *level_node, *wnode;
3119         rdatasetheader_t *header;
3120         isc_result_t result = ISC_R_NOTFOUND;
3121         dns_name_t name;
3122         dns_name_t *wname;
3123         dns_fixedname_t fwname;
3124         dns_rbtdb_t *rbtdb;
3125         isc_boolean_t done, wild, active;
3126         dns_rbtnodechain_t wchain;
3127
3128         /*
3129          * Caller must be holding the tree lock and MUST NOT be holding
3130          * any node locks.
3131          */
3132
3133         /*
3134          * Examine each ancestor level.  If the level's wild bit
3135          * is set, then construct the corresponding wildcard name and
3136          * search for it.  If the wildcard node exists, and is active in
3137          * this version, we're done.  If not, then we next check to see
3138          * if the ancestor is active in this version.  If so, then there
3139          * can be no possible wildcard match and again we're done.  If not,
3140          * continue the search.
3141          */
3142
3143         rbtdb = search->rbtdb;
3144         i = search->chain.level_matches;
3145         done = ISC_FALSE;
3146         node = *nodep;
3147         do {
3148                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3149                           isc_rwlocktype_read);
3150
3151                 /*
3152                  * First we try to figure out if this node is active in
3153                  * the search's version.  We do this now, even though we
3154                  * may not need the information, because it simplifies the
3155                  * locking and code flow.
3156                  */
3157                 for (header = node->data;
3158                      header != NULL;
3159                      header = header->next) {
3160                         if (header->serial <= search->serial &&
3161                             !IGNORE(header) && EXISTS(header))
3162                                 break;
3163                 }
3164                 if (header != NULL)
3165                         active = ISC_TRUE;
3166                 else
3167                         active = ISC_FALSE;
3168
3169                 if (node->wild)
3170                         wild = ISC_TRUE;
3171                 else
3172                         wild = ISC_FALSE;
3173
3174                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3175                             isc_rwlocktype_read);
3176
3177                 if (wild) {
3178                         /*
3179                          * Construct the wildcard name for this level.
3180                          */
3181                         dns_name_init(&name, NULL);
3182                         dns_rbt_namefromnode(node, &name);
3183                         dns_fixedname_init(&fwname);
3184                         wname = dns_fixedname_name(&fwname);
3185                         result = dns_name_concatenate(dns_wildcardname, &name,
3186                                                       wname, NULL);
3187                         j = i;
3188                         while (result == ISC_R_SUCCESS && j != 0) {
3189                                 j--;
3190                                 level_node = search->chain.levels[j];
3191                                 dns_name_init(&name, NULL);
3192                                 dns_rbt_namefromnode(level_node, &name);
3193                                 result = dns_name_concatenate(wname,
3194                                                               &name,
3195                                                               wname,
3196                                                               NULL);
3197                         }
3198                         if (result != ISC_R_SUCCESS)
3199                                 break;
3200
3201                         wnode = NULL;
3202                         dns_rbtnodechain_init(&wchain, NULL);
3203                         result = dns_rbt_findnode(rbtdb->tree, wname,
3204                                                   NULL, &wnode, &wchain,
3205                                                   DNS_RBTFIND_EMPTYDATA,
3206                                                   NULL, NULL);
3207                         if (result == ISC_R_SUCCESS) {
3208                                 nodelock_t *lock;
3209
3210                                 /*
3211                                  * We have found the wildcard node.  If it
3212                                  * is active in the search's version, we're
3213                                  * done.
3214                                  */
3215                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
3216                                 NODE_LOCK(lock, isc_rwlocktype_read);
3217                                 for (header = wnode->data;
3218                                      header != NULL;
3219                                      header = header->next) {
3220                                         if (header->serial <= search->serial &&
3221                                             !IGNORE(header) && EXISTS(header))
3222                                                 break;
3223                                 }
3224                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3225                                 if (header != NULL ||
3226                                     activeempty(search, &wchain, wname)) {
3227                                         if (activeemtpynode(search, qname,
3228                                                             wname)) {
3229                                                 return (ISC_R_NOTFOUND);
3230                                         }
3231                                         /*
3232                                          * The wildcard node is active!
3233                                          *
3234                                          * Note: result is still ISC_R_SUCCESS
3235                                          * so we don't have to set it.
3236                                          */
3237                                         *nodep = wnode;
3238                                         break;
3239                                 }
3240                         } else if (result != ISC_R_NOTFOUND &&
3241                                    result != DNS_R_PARTIALMATCH) {
3242                                 /*
3243                                  * An error has occurred.  Bail out.
3244                                  */
3245                                 break;
3246                         }
3247                 }
3248
3249                 if (active) {
3250                         /*
3251                          * The level node is active.  Any wildcarding
3252                          * present at higher levels has no
3253                          * effect and we're done.
3254                          */
3255                         result = ISC_R_NOTFOUND;
3256                         break;
3257                 }
3258
3259                 if (i > 0) {
3260                         i--;
3261                         node = search->chain.levels[i];
3262                 } else
3263                         done = ISC_TRUE;
3264         } while (!done);
3265
3266         return (result);
3267 }
3268
3269 static isc_boolean_t
3270 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3271 {
3272         dns_rdata_t rdata = DNS_RDATA_INIT;
3273         dns_rdata_nsec3_t nsec3;
3274         unsigned char *raw;                     /* RDATASLAB */
3275         unsigned int rdlen, count;
3276         isc_region_t region;
3277         isc_result_t result;
3278
3279         REQUIRE(header->type == dns_rdatatype_nsec3);
3280
3281         raw = (unsigned char *)header + sizeof(*header);
3282         count = raw[0] * 256 + raw[1]; /* count */
3283 #if DNS_RDATASET_FIXED
3284         raw += count * 4 + 2;
3285 #else
3286         raw += 2;
3287 #endif
3288         while (count-- > 0) {
3289                 rdlen = raw[0] * 256 + raw[1];
3290 #if DNS_RDATASET_FIXED
3291                 raw += 4;
3292 #else
3293                 raw += 2;
3294 #endif
3295                 region.base = raw;
3296                 region.length = rdlen;
3297                 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3298                                      dns_rdatatype_nsec3, &region);
3299                 raw += rdlen;
3300                 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3301                 INSIST(result == ISC_R_SUCCESS);
3302                 if (nsec3.hash == search->rbtversion->hash &&
3303                     nsec3.iterations == search->rbtversion->iterations &&
3304                     nsec3.salt_length == search->rbtversion->salt_length &&
3305                     memcmp(nsec3.salt, search->rbtversion->salt,
3306                            nsec3.salt_length) == 0)
3307                         return (ISC_TRUE);
3308                 dns_rdata_reset(&rdata);
3309         }
3310         return (ISC_FALSE);
3311 }
3312
3313 /*
3314  * Find node of the NSEC/NSEC3 record that is 'name'.
3315  */
3316 static inline isc_result_t
3317 previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search,
3318                     dns_name_t *name, dns_name_t *origin,
3319                     dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain,
3320                     isc_boolean_t *firstp)
3321 {
3322         dns_fixedname_t ftarget;
3323         dns_name_t *target;
3324         dns_rbtnode_t *nsecnode;
3325         isc_result_t result;
3326
3327         REQUIRE(nodep != NULL && *nodep == NULL);
3328
3329         if (type == dns_rdatatype_nsec3) {
3330                 result = dns_rbtnodechain_prev(&search->chain, NULL, NULL);
3331                 if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN)
3332                         return (result);
3333                 result = dns_rbtnodechain_current(&search->chain, name, origin,
3334                                                   nodep);
3335                 return (result);
3336         }
3337
3338         dns_fixedname_init(&ftarget);
3339         target = dns_fixedname_name(&ftarget);
3340
3341         for (;;) {
3342                 if (*firstp) {
3343                         /*
3344                          * Construct the name of the second node to check.
3345                          * It is the first node sought in the NSEC tree.
3346                          */
3347                         *firstp = ISC_FALSE;
3348                         dns_rbtnodechain_init(nsecchain, NULL);
3349                         result = dns_name_concatenate(name, origin,
3350                                                       target, NULL);
3351                         if (result != ISC_R_SUCCESS)
3352                                 return (result);
3353                         nsecnode = NULL;
3354                         result = dns_rbt_findnode(search->rbtdb->nsec,
3355                                                   target, NULL,
3356                                                   &nsecnode, nsecchain,
3357                                                   DNS_RBTFIND_NOOPTIONS,
3358                                                   NULL, NULL);
3359                         if (result == ISC_R_SUCCESS) {
3360                                 /*
3361                                  * Since this was the first loop, finding the
3362                                  * name in the NSEC tree implies that the first
3363                                  * node checked in the main tree had an
3364                                  * unacceptable NSEC record.
3365                                  * Try the previous node in the NSEC tree.
3366                                  */
3367                                 result = dns_rbtnodechain_prev(nsecchain,
3368                                                                name, origin);
3369                                 if (result == DNS_R_NEWORIGIN)
3370                                         result = ISC_R_SUCCESS;
3371                         } else if (result == ISC_R_NOTFOUND ||
3372                                    result == DNS_R_PARTIALMATCH) {
3373                                 result = dns_rbtnodechain_current(nsecchain,
3374                                                         name, origin, NULL);
3375                                 if (result == ISC_R_NOTFOUND)
3376                                         result = ISC_R_NOMORE;
3377                         }
3378                 } else {
3379                         /*
3380                          * This is a second or later trip through the auxiliary
3381                          * tree for the name of a third or earlier NSEC node in
3382                          * the main tree.  Previous trips through the NSEC tree
3383                          * must have found nodes in the main tree with NSEC
3384                          * records.  Perhaps they lacked signature records.
3385                          */
3386                         result = dns_rbtnodechain_prev(nsecchain, name, origin);
3387                         if (result == DNS_R_NEWORIGIN)
3388                                 result = ISC_R_SUCCESS;
3389                 }
3390                 if (result != ISC_R_SUCCESS)
3391                         return (result);
3392
3393                 /*
3394                  * Construct the name to seek in the main tree.
3395                  */
3396                 result = dns_name_concatenate(name, origin, target, NULL);
3397                 if (result != ISC_R_SUCCESS)
3398                         return (result);
3399
3400                 *nodep = NULL;
3401                 result = dns_rbt_findnode(search->rbtdb->tree, target, NULL,
3402                                           nodep, &search->chain,
3403                                           DNS_RBTFIND_NOOPTIONS, NULL, NULL);
3404                 if (result == ISC_R_SUCCESS)
3405                         return (result);
3406
3407                 /*
3408                  * There should always be a node in the main tree with the
3409                  * same name as the node in the auxiliary NSEC tree, except for
3410                  * nodes in the auxiliary tree that are awaiting deletion.
3411                  */
3412                 if (result != DNS_R_PARTIALMATCH && result != ISC_R_NOTFOUND) {
3413                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
3414                                       DNS_LOGMODULE_CACHE, ISC_LOG_ERROR,
3415                                       "previous_closest_nsec(): %s",
3416                                       isc_result_totext(result));
3417                         return (DNS_R_BADDB);
3418                 }
3419         }
3420 }
3421
3422 /*
3423  * Find the NSEC/NSEC3 which is or before the current point on the
3424  * search chain.  For NSEC3 records only NSEC3 records that match the
3425  * current NSEC3PARAM record are considered.
3426  */
3427 static inline isc_result_t
3428 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3429                   dns_name_t *foundname, dns_rdataset_t *rdataset,
3430                   dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3431                   dns_db_secure_t secure)
3432 {
3433         dns_rbtnode_t *node, *prevnode;
3434         rdatasetheader_t *header, *header_next, *found, *foundsig;
3435         dns_rbtnodechain_t nsecchain;
3436         isc_boolean_t empty_node;
3437         isc_result_t result;
3438         dns_fixedname_t fname, forigin;
3439         dns_name_t *name, *origin;
3440         dns_rdatatype_t type;
3441         rbtdb_rdatatype_t sigtype;
3442         isc_boolean_t wraps;
3443         isc_boolean_t first = ISC_TRUE;
3444         isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3445
3446         if (tree == search->rbtdb->nsec3) {
3447                 type = dns_rdatatype_nsec3;
3448                 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3449                 wraps = ISC_TRUE;
3450         } else {
3451                 type = dns_rdatatype_nsec;
3452                 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3453                 wraps = ISC_FALSE;
3454         }
3455
3456         /*
3457          * Use the auxiliary tree only starting with the second node in the
3458          * hope that the original node will be right much of the time.
3459          */
3460         dns_fixedname_init(&fname);
3461         name = dns_fixedname_name(&fname);
3462         dns_fixedname_init(&forigin);
3463         origin = dns_fixedname_name(&forigin);
3464  again:
3465         node = NULL;
3466         prevnode = NULL;
3467         result = dns_rbtnodechain_current(&search->chain, name, origin, &node);
3468         if (result != ISC_R_SUCCESS)
3469                 return (result);
3470         do {
3471                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3472                           isc_rwlocktype_read);
3473                 found = NULL;
3474                 foundsig = NULL;
3475                 empty_node = ISC_TRUE;
3476                 for (header = node->data;
3477                      header != NULL;
3478                      header = header_next) {
3479                         header_next = header->next;
3480                         /*
3481                          * Look for an active, extant NSEC or RRSIG NSEC.
3482                          */
3483                         do {
3484                                 if (header->serial <= search->serial &&
3485                                     !IGNORE(header)) {
3486                                         /*
3487                                          * Is this a "this rdataset doesn't
3488                                          * exist" record?
3489                                          */
3490                                         if (NONEXISTENT(header))
3491                                                 header = NULL;
3492                                         break;
3493                                 } else
3494                                         header = header->down;
3495                         } while (header != NULL);
3496                         if (header != NULL) {
3497                                 /*
3498                                  * We now know that there is at least one
3499                                  * active rdataset at this node.
3500                                  */
3501                                 empty_node = ISC_FALSE;
3502                                 if (header->type == type) {
3503                                         found = header;
3504                                         if (foundsig != NULL)
3505                                                 break;
3506                                 } else if (header->type == sigtype) {
3507                                         foundsig = header;
3508                                         if (found != NULL)
3509                                                 break;
3510                                 }
3511                         }
3512                 }
3513                 if (!empty_node) {
3514                         if (found != NULL && search->rbtversion->havensec3 &&
3515                             found->type == dns_rdatatype_nsec3 &&
3516                             !matchparams(found, search)) {
3517                                 empty_node = ISC_TRUE;
3518                                 found = NULL;
3519                                 foundsig = NULL;
3520                                 result = previous_closest_nsec(type, search,
3521                                                                name, origin,
3522                                                                &prevnode, NULL,
3523                                                                NULL);
3524                         } else if (found != NULL &&
3525                                    (foundsig != NULL || !need_sig)) {
3526                                 /*
3527                                  * We've found the right NSEC/NSEC3 record.
3528                                  *
3529                                  * Note: for this to really be the right
3530                                  * NSEC record, it's essential that the NSEC
3531                                  * records of any nodes obscured by a zone
3532                                  * cut have been removed; we assume this is
3533                                  * the case.
3534                                  */
3535                                 result = dns_name_concatenate(name, origin,
3536                                                               foundname, NULL);
3537                                 if (result == ISC_R_SUCCESS) {
3538                                         if (nodep != NULL) {
3539                                                 new_reference(search->rbtdb,
3540                                                               node);
3541                                                 *nodep = node;
3542                                         }
3543                                         bind_rdataset(search->rbtdb, node,
3544                                                       found, search->now,
3545                                                       rdataset);
3546                                         if (foundsig != NULL)
3547                                                 bind_rdataset(search->rbtdb,
3548                                                               node,
3549                                                               foundsig,
3550                                                               search->now,
3551                                                               sigrdataset);
3552                                 }
3553                         } else if (found == NULL && foundsig == NULL) {
3554                                 /*
3555                                  * This node is active, but has no NSEC or
3556                                  * RRSIG NSEC.  That means it's glue or
3557                                  * other obscured zone data that isn't
3558                                  * relevant for our search.  Treat the
3559                                  * node as if it were empty and keep looking.
3560                                  */
3561                                 empty_node = ISC_TRUE;
3562                                 result = previous_closest_nsec(type, search,
3563                                                                name, origin,
3564                                                                &prevnode,
3565                                                                &nsecchain,
3566                                                                &first);
3567                         } else {
3568                                 /*
3569                                  * We found an active node, but either the
3570                                  * NSEC or the RRSIG NSEC is missing.  This
3571                                  * shouldn't happen.
3572                                  */
3573                                 result = DNS_R_BADDB;
3574                         }
3575                 } else {
3576                         /*
3577                          * This node isn't active.  We've got to keep
3578                          * looking.
3579                          */
3580                         result = previous_closest_nsec(type, search,
3581                                                        name, origin, &prevnode,
3582                                                        &nsecchain, &first);
3583                 }
3584                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3585                             isc_rwlocktype_read);
3586                 node = prevnode;
3587                 prevnode = NULL;
3588         } while (empty_node && result == ISC_R_SUCCESS);
3589
3590         if (!first)
3591                 dns_rbtnodechain_invalidate(&nsecchain);
3592
3593         if (result == ISC_R_NOMORE && wraps) {
3594                 result = dns_rbtnodechain_last(&search->chain, tree,
3595                                                NULL, NULL);
3596                 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3597                         wraps = ISC_FALSE;
3598                         goto again;
3599                 }
3600         }
3601
3602         /*
3603          * If the result is ISC_R_NOMORE, then we got to the beginning of
3604          * the database and didn't find a NSEC record.  This shouldn't
3605          * happen.
3606          */
3607         if (result == ISC_R_NOMORE)
3608                 result = DNS_R_BADDB;
3609
3610         return (result);
3611 }
3612
3613 static isc_result_t
3614 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3615           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3616           dns_dbnode_t **nodep, dns_name_t *foundname,
3617           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3618 {
3619         dns_rbtnode_t *node = NULL;
3620         isc_result_t result;
3621         rbtdb_search_t search;
3622         isc_boolean_t cname_ok = ISC_TRUE;
3623         isc_boolean_t close_version = ISC_FALSE;
3624         isc_boolean_t maybe_zonecut = ISC_FALSE;
3625         isc_boolean_t at_zonecut = ISC_FALSE;
3626         isc_boolean_t wild;
3627         isc_boolean_t empty_node;
3628         rdatasetheader_t *header, *header_next, *found, *nsecheader;
3629         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3630         rbtdb_rdatatype_t sigtype;
3631         isc_boolean_t active;
3632         dns_rbtnodechain_t chain;
3633         nodelock_t *lock;
3634         dns_rbt_t *tree;
3635
3636         search.rbtdb = (dns_rbtdb_t *)db;
3637
3638         REQUIRE(VALID_RBTDB(search.rbtdb));
3639         INSIST(version == NULL ||
3640                ((rbtdb_version_t *)version)->rbtdb == (dns_rbtdb_t *)db);
3641
3642         /*
3643          * We don't care about 'now'.
3644          */
3645         UNUSED(now);
3646
3647         /*
3648          * If the caller didn't supply a version, attach to the current
3649          * version.
3650          */
3651         if (version == NULL) {
3652                 currentversion(db, &version);
3653                 close_version = ISC_TRUE;
3654         }
3655
3656         search.rbtversion = version;
3657         search.serial = search.rbtversion->serial;
3658         search.options = options;
3659         search.copy_name = ISC_FALSE;
3660         search.need_cleanup = ISC_FALSE;
3661         search.wild = ISC_FALSE;
3662         search.zonecut = NULL;
3663         dns_fixedname_init(&search.zonecut_name);
3664         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3665         search.now = 0;
3666
3667         /*
3668          * 'wild' will be true iff. we've matched a wildcard.
3669          */
3670         wild = ISC_FALSE;
3671
3672         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3673
3674         /*
3675          * Search down from the root of the tree.  If, while going down, we
3676          * encounter a callback node, zone_zonecut_callback() will search the
3677          * rdatasets at the zone cut for active DNAME or NS rdatasets.
3678          */
3679         tree =  (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3680                                                          search.rbtdb->tree;
3681         result = dns_rbt_findnode(tree, name, foundname, &node,
3682                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
3683                                   zone_zonecut_callback, &search);
3684
3685         if (result == DNS_R_PARTIALMATCH) {
3686         partial_match:
3687                 if (search.zonecut != NULL) {
3688                     result = setup_delegation(&search, nodep, foundname,
3689                                               rdataset, sigrdataset);
3690                     goto tree_exit;
3691                 }
3692
3693                 if (search.wild) {
3694                         /*
3695                          * At least one of the levels in the search chain
3696                          * potentially has a wildcard.  For each such level,
3697                          * we must see if there's a matching wildcard active
3698                          * in the current version.
3699                          */
3700                         result = find_wildcard(&search, &node, name);
3701                         if (result == ISC_R_SUCCESS) {
3702                                 result = dns_name_copy(name, foundname, NULL);
3703                                 if (result != ISC_R_SUCCESS)
3704                                         goto tree_exit;
3705                                 wild = ISC_TRUE;
3706                                 goto found;
3707                         }
3708                         else if (result != ISC_R_NOTFOUND)
3709                                 goto tree_exit;
3710                 }
3711
3712                 chain = search.chain;
3713                 active = activeempty(&search, &chain, name);
3714
3715                 /*
3716                  * If we're here, then the name does not exist, is not
3717                  * beneath a zonecut, and there's no matching wildcard.
3718                  */
3719                 if ((search.rbtversion->secure == dns_db_secure &&
3720                      !search.rbtversion->havensec3) ||
3721                     (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3722                     (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3723                 {
3724                         result = find_closest_nsec(&search, nodep, foundname,
3725                                                    rdataset, sigrdataset, tree,
3726                                                    search.rbtversion->secure);
3727                         if (result == ISC_R_SUCCESS)
3728                                 result = active ? DNS_R_EMPTYNAME :
3729                                                   DNS_R_NXDOMAIN;
3730                 } else
3731                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3732                 goto tree_exit;
3733         } else if (result != ISC_R_SUCCESS)
3734                 goto tree_exit;
3735
3736  found:
3737         /*
3738          * We have found a node whose name is the desired name, or we
3739          * have matched a wildcard.
3740          */
3741
3742         if (search.zonecut != NULL) {
3743                 /*
3744                  * If we're beneath a zone cut, we don't want to look for
3745                  * CNAMEs because they're not legitimate zone glue.
3746                  */
3747                 cname_ok = ISC_FALSE;
3748         } else {
3749                 /*
3750                  * The node may be a zone cut itself.  If it might be one,
3751                  * make sure we check for it later.
3752                  *
3753                  * DS records live above the zone cut in ordinary zone so
3754                  * we want to ignore any referral.
3755                  *
3756                  * Stub zones don't have anything "above" the delgation so
3757                  * we always return a referral.
3758                  */
3759                 if (node->find_callback &&
3760                     ((node != search.rbtdb->origin_node &&
3761                       !dns_rdatatype_atparent(type)) ||
3762                      IS_STUB(search.rbtdb)))
3763                         maybe_zonecut = ISC_TRUE;
3764         }
3765
3766         /*
3767          * Certain DNSSEC types are not subject to CNAME matching
3768          * (RFC4035, section 2.5 and RFC3007).
3769          *
3770          * We don't check for RRSIG, because we don't store RRSIG records
3771          * directly.
3772          */
3773         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3774                 cname_ok = ISC_FALSE;
3775
3776         /*
3777          * We now go looking for rdata...
3778          */
3779
3780         lock = &search.rbtdb->node_locks[node->locknum].lock;
3781         NODE_LOCK(lock, isc_rwlocktype_read);
3782
3783         found = NULL;
3784         foundsig = NULL;
3785         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3786         nsecheader = NULL;
3787         nsecsig = NULL;
3788         cnamesig = NULL;
3789         empty_node = ISC_TRUE;
3790         for (header = node->data; header != NULL; header = header_next) {
3791                 header_next = header->next;
3792                 /*
3793                  * Look for an active, extant rdataset.
3794                  */
3795                 do {
3796                         if (header->serial <= search.serial &&
3797                             !IGNORE(header)) {
3798                                 /*
3799                                  * Is this a "this rdataset doesn't
3800                                  * exist" record?
3801                                  */
3802                                 if (NONEXISTENT(header))
3803                                         header = NULL;
3804                                 break;
3805                         } else
3806                                 header = header->down;
3807                 } while (header != NULL);
3808                 if (header != NULL) {
3809                         /*
3810                          * We now know that there is at least one active
3811                          * rdataset at this node.
3812                          */
3813                         empty_node = ISC_FALSE;
3814
3815                         /*
3816                          * Do special zone cut handling, if requested.
3817                          */
3818                         if (maybe_zonecut &&
3819                             header->type == dns_rdatatype_ns) {
3820                                 /*
3821                                  * We increment the reference count on node to
3822                                  * ensure that search->zonecut_rdataset will
3823                                  * still be valid later.
3824                                  */
3825                                 new_reference(search.rbtdb, node);
3826                                 search.zonecut = node;
3827                                 search.zonecut_rdataset = header;
3828                                 search.zonecut_sigrdataset = NULL;
3829                                 search.need_cleanup = ISC_TRUE;
3830                                 maybe_zonecut = ISC_FALSE;
3831                                 at_zonecut = ISC_TRUE;
3832                                 /*
3833                                  * It is not clear if KEY should still be
3834                                  * allowed at the parent side of the zone
3835                                  * cut or not.  It is needed for RFC3007
3836                                  * validated updates.
3837                                  */
3838                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3839                                     && type != dns_rdatatype_nsec
3840                                     && type != dns_rdatatype_key) {
3841                                         /*
3842                                          * Glue is not OK, but any answer we
3843                                          * could return would be glue.  Return
3844                                          * the delegation.
3845                                          */
3846                                         found = NULL;
3847                                         break;
3848                                 }
3849                                 if (found != NULL && foundsig != NULL)
3850                                         break;
3851                         }
3852
3853
3854                         /*
3855                          * If the NSEC3 record doesn't match the chain
3856                          * we are using behave as if it isn't here.
3857                          */
3858                         if (header->type == dns_rdatatype_nsec3 &&
3859                            !matchparams(header, &search)) {
3860                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3861                                 goto partial_match;
3862                         }
3863                         /*
3864                          * If we found a type we were looking for,
3865                          * remember it.
3866                          */
3867                         if (header->type == type ||
3868                             type == dns_rdatatype_any ||
3869                             (header->type == dns_rdatatype_cname &&
3870                              cname_ok)) {
3871                                 /*
3872                                  * We've found the answer!
3873                                  */
3874                                 found = header;
3875                                 if (header->type == dns_rdatatype_cname &&
3876                                     cname_ok) {
3877                                         /*
3878                                          * We may be finding a CNAME instead
3879                                          * of the desired type.
3880                                          *
3881                                          * If we've already got the CNAME RRSIG,
3882                                          * use it, otherwise change sigtype
3883                                          * so that we find it.
3884                                          */
3885                                         if (cnamesig != NULL)
3886                                                 foundsig = cnamesig;
3887                                         else
3888                                                 sigtype =
3889                                                     RBTDB_RDATATYPE_SIGCNAME;
3890                                 }
3891                                 /*
3892                                  * If we've got all we need, end the search.
3893                                  */
3894                                 if (!maybe_zonecut && foundsig != NULL)
3895                                         break;
3896                         } else if (header->type == sigtype) {
3897                                 /*
3898                                  * We've found the RRSIG rdataset for our
3899                                  * target type.  Remember it.
3900                                  */
3901                                 foundsig = header;
3902                                 /*
3903                                  * If we've got all we need, end the search.
3904                                  */
3905                                 if (!maybe_zonecut && found != NULL)
3906                                         break;
3907                         } else if (header->type == dns_rdatatype_nsec &&
3908                                    !search.rbtversion->havensec3) {
3909                                 /*
3910                                  * Remember a NSEC rdataset even if we're
3911                                  * not specifically looking for it, because
3912                                  * we might need it later.
3913                                  */
3914                                 nsecheader = header;
3915                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3916                                    !search.rbtversion->havensec3) {
3917                                 /*
3918                                  * If we need the NSEC rdataset, we'll also
3919                                  * need its signature.
3920                                  */
3921                                 nsecsig = header;
3922                         } else if (cname_ok &&
3923                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3924                                 /*
3925                                  * If we get a CNAME match, we'll also need
3926                                  * its signature.
3927                                  */
3928                                 cnamesig = header;
3929                         }
3930                 }
3931         }
3932
3933         if (empty_node) {
3934                 /*
3935                  * We have an exact match for the name, but there are no
3936                  * active rdatasets in the desired version.  That means that
3937                  * this node doesn't exist in the desired version, and that
3938                  * we really have a partial match.
3939                  */
3940                 if (!wild) {
3941                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3942                         goto partial_match;
3943                 }
3944         }
3945
3946         /*
3947          * If we didn't find what we were looking for...
3948          */
3949         if (found == NULL) {
3950                 if (search.zonecut != NULL) {
3951                         /*
3952                          * We were trying to find glue at a node beneath a
3953                          * zone cut, but didn't.
3954                          *
3955                          * Return the delegation.
3956                          */
3957                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3958                         result = setup_delegation(&search, nodep, foundname,
3959                                                   rdataset, sigrdataset);
3960                         goto tree_exit;
3961                 }
3962                 /*
3963                  * The desired type doesn't exist.
3964                  */
3965                 result = DNS_R_NXRRSET;
3966                 if (search.rbtversion->secure == dns_db_secure &&
3967                     !search.rbtversion->havensec3 &&
3968                     (nsecheader == NULL || nsecsig == NULL)) {
3969                         /*
3970                          * The zone is secure but there's no NSEC,
3971                          * or the NSEC has no signature!
3972                          */
3973                         if (!wild) {
3974                                 result = DNS_R_BADDB;
3975                                 goto node_exit;
3976                         }
3977
3978                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3979                         result = find_closest_nsec(&search, nodep, foundname,
3980                                                    rdataset, sigrdataset,
3981                                                    search.rbtdb->tree,
3982                                                    search.rbtversion->secure);
3983                         if (result == ISC_R_SUCCESS)
3984                                 result = DNS_R_EMPTYWILD;
3985                         goto tree_exit;
3986                 }
3987                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3988                     nsecheader == NULL)
3989                 {
3990                         /*
3991                          * There's no NSEC record, and we were told
3992                          * to find one.
3993                          */
3994                         result = DNS_R_BADDB;
3995                         goto node_exit;
3996                 }
3997                 if (nodep != NULL) {
3998                         new_reference(search.rbtdb, node);
3999                         *nodep = node;
4000                 }
4001                 if ((search.rbtversion->secure == dns_db_secure &&
4002                      !search.rbtversion->havensec3) ||
4003                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
4004                 {
4005                         bind_rdataset(search.rbtdb, node, nsecheader,
4006                                       0, rdataset);
4007                         if (nsecsig != NULL)
4008                                 bind_rdataset(search.rbtdb, node,
4009                                               nsecsig, 0, sigrdataset);
4010                 }
4011                 if (wild)
4012                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4013                 goto node_exit;
4014         }
4015
4016         /*
4017          * We found what we were looking for, or we found a CNAME.
4018          */
4019
4020         if (type != found->type &&
4021             type != dns_rdatatype_any &&
4022             found->type == dns_rdatatype_cname) {
4023                 /*
4024                  * We weren't doing an ANY query and we found a CNAME instead
4025                  * of the type we were looking for, so we need to indicate
4026                  * that result to the caller.
4027                  */
4028                 result = DNS_R_CNAME;
4029         } else if (search.zonecut != NULL) {
4030                 /*
4031                  * If we're beneath a zone cut, we must indicate that the
4032                  * result is glue, unless we're actually at the zone cut
4033                  * and the type is NSEC or KEY.
4034                  */
4035                 if (search.zonecut == node) {
4036                         /*
4037                          * It is not clear if KEY should still be
4038                          * allowed at the parent side of the zone
4039                          * cut or not.  It is needed for RFC3007
4040                          * validated updates.
4041                          */
4042                         if (type == dns_rdatatype_nsec ||
4043                             type == dns_rdatatype_nsec3 ||
4044                             type == dns_rdatatype_key)
4045                                 result = ISC_R_SUCCESS;
4046                         else if (type == dns_rdatatype_any)
4047                                 result = DNS_R_ZONECUT;
4048                         else
4049                                 result = DNS_R_GLUE;
4050                 } else
4051                         result = DNS_R_GLUE;
4052                 /*
4053                  * We might have found data that isn't glue, but was occluded
4054                  * by a dynamic update.  If the caller cares about this, they
4055                  * will have told us to validate glue.
4056                  *
4057                  * XXX We should cache the glue validity state!
4058                  */
4059                 if (result == DNS_R_GLUE &&
4060                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
4061                     !valid_glue(&search, foundname, type, node)) {
4062                         NODE_UNLOCK(lock, isc_rwlocktype_read);
4063                         result = setup_delegation(&search, nodep, foundname,
4064                                                   rdataset, sigrdataset);
4065                     goto tree_exit;
4066                 }
4067         } else {
4068                 /*
4069                  * An ordinary successful query!
4070                  */
4071                 result = ISC_R_SUCCESS;
4072         }
4073
4074         if (nodep != NULL) {
4075                 if (!at_zonecut)
4076                         new_reference(search.rbtdb, node);
4077                 else
4078                         search.need_cleanup = ISC_FALSE;
4079                 *nodep = node;
4080         }
4081
4082         if (type != dns_rdatatype_any) {
4083                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
4084                 if (foundsig != NULL)
4085                         bind_rdataset(search.rbtdb, node, foundsig, 0,
4086                                       sigrdataset);
4087         }
4088
4089         if (wild)
4090                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4091
4092  node_exit:
4093         NODE_UNLOCK(lock, isc_rwlocktype_read);
4094
4095  tree_exit:
4096         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4097
4098         /*
4099          * If we found a zonecut but aren't going to use it, we have to
4100          * let go of it.
4101          */
4102         if (search.need_cleanup) {
4103                 node = search.zonecut;
4104                 INSIST(node != NULL);
4105                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4106
4107                 NODE_LOCK(lock, isc_rwlocktype_read);
4108                 decrement_reference(search.rbtdb, node, 0,
4109                                     isc_rwlocktype_read, isc_rwlocktype_none,
4110                                     ISC_FALSE);
4111                 NODE_UNLOCK(lock, isc_rwlocktype_read);
4112         }
4113
4114         if (close_version)
4115                 closeversion(db, &version, ISC_FALSE);
4116
4117         dns_rbtnodechain_reset(&search.chain);
4118
4119         return (result);
4120 }
4121
4122 static isc_result_t
4123 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4124                  isc_stdtime_t now, dns_dbnode_t **nodep,
4125                  dns_name_t *foundname,
4126                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4127 {
4128         UNUSED(db);
4129         UNUSED(name);
4130         UNUSED(options);
4131         UNUSED(now);
4132         UNUSED(nodep);
4133         UNUSED(foundname);
4134         UNUSED(rdataset);
4135         UNUSED(sigrdataset);
4136
4137         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
4138
4139         /* NOTREACHED */
4140         return (ISC_R_NOTIMPLEMENTED);
4141 }
4142
4143 static isc_result_t
4144 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
4145         rbtdb_search_t *search = arg;
4146         rdatasetheader_t *header, *header_prev, *header_next;
4147         rdatasetheader_t *dname_header, *sigdname_header;
4148         isc_result_t result;
4149         nodelock_t *lock;
4150         isc_rwlocktype_t locktype;
4151
4152         /* XXX comment */
4153
4154         REQUIRE(search->zonecut == NULL);
4155
4156         /*
4157          * Keep compiler silent.
4158          */
4159         UNUSED(name);
4160
4161         lock = &(search->rbtdb->node_locks[node->locknum].lock);
4162         locktype = isc_rwlocktype_read;
4163         NODE_LOCK(lock, locktype);
4164
4165         /*
4166          * Look for a DNAME or RRSIG DNAME rdataset.
4167          */
4168         dname_header = NULL;
4169         sigdname_header = NULL;
4170         header_prev = NULL;
4171         for (header = node->data; header != NULL; header = header_next) {
4172                 header_next = header->next;
4173                 if (header->rdh_ttl <  search->now) {
4174                         /*
4175                          * This rdataset is stale.  If no one else is
4176                          * using the node, we can clean it up right
4177                          * now, otherwise we mark it as stale, and
4178                          * the node as dirty, so it will get cleaned
4179                          * up later.
4180                          */
4181                         if ((header->rdh_ttl <  search->now - RBTDB_VIRTUAL) &&
4182                             (locktype == isc_rwlocktype_write ||
4183                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4184                                 /*
4185                                  * We update the node's status only when we
4186                                  * can get write access; otherwise, we leave
4187                                  * others to this work.  Periodical cleaning
4188                                  * will eventually take the job as the last
4189                                  * resort.
4190                                  * We won't downgrade the lock, since other
4191                                  * rdatasets are probably stale, too.
4192                                  */
4193                                 locktype = isc_rwlocktype_write;
4194
4195                                 if (dns_rbtnode_refcurrent(node) == 0) {
4196                                         isc_mem_t *mctx;
4197
4198                                         /*
4199                                          * header->down can be non-NULL if the
4200                                          * refcount has just decremented to 0
4201                                          * but decrement_reference() has not
4202                                          * performed clean_cache_node(), in
4203                                          * which case we need to purge the
4204                                          * stale headers first.
4205                                          */
4206                                         mctx = search->rbtdb->common.mctx;
4207                                         clean_stale_headers(search->rbtdb,
4208                                                             mctx,
4209                                                             header);
4210                                         if (header_prev != NULL)
4211                                                 header_prev->next =
4212                                                         header->next;
4213                                         else
4214                                                 node->data = header->next;
4215                                         free_rdataset(search->rbtdb, mctx,
4216                                                       header);
4217                                 } else {
4218                                         header->attributes |=
4219                                                 RDATASET_ATTR_STALE;
4220                                         node->dirty = 1;
4221                                         header_prev = header;
4222                                 }
4223                         } else
4224                                 header_prev = header;
4225                 } else if (header->type == dns_rdatatype_dname &&
4226                            EXISTS(header)) {
4227                         dname_header = header;
4228                         header_prev = header;
4229                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4230                          EXISTS(header)) {
4231                         sigdname_header = header;
4232                         header_prev = header;
4233                 } else
4234                         header_prev = header;
4235         }
4236
4237         if (dname_header != NULL &&
4238             (!DNS_TRUST_PENDING(dname_header->trust) ||
4239              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4240                 /*
4241                  * We increment the reference count on node to ensure that
4242                  * search->zonecut_rdataset will still be valid later.
4243                  */
4244                 new_reference(search->rbtdb, node);
4245                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4246                 search->zonecut = node;
4247                 search->zonecut_rdataset = dname_header;
4248                 search->zonecut_sigrdataset = sigdname_header;
4249                 search->need_cleanup = ISC_TRUE;
4250                 result = DNS_R_PARTIALMATCH;
4251         } else
4252                 result = DNS_R_CONTINUE;
4253
4254         NODE_UNLOCK(lock, locktype);
4255
4256         return (result);
4257 }
4258
4259 static inline isc_result_t
4260 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4261                      dns_dbnode_t **nodep, dns_name_t *foundname,
4262                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4263 {
4264         unsigned int i;
4265         dns_rbtnode_t *level_node;
4266         rdatasetheader_t *header, *header_prev, *header_next;
4267         rdatasetheader_t *found, *foundsig;
4268         isc_result_t result = ISC_R_NOTFOUND;
4269         dns_name_t name;
4270         dns_rbtdb_t *rbtdb;
4271         isc_boolean_t done;
4272         nodelock_t *lock;
4273         isc_rwlocktype_t locktype;
4274
4275         /*
4276          * Caller must be holding the tree lock.
4277          */
4278
4279         rbtdb = search->rbtdb;
4280         i = search->chain.level_matches;
4281         done = ISC_FALSE;
4282         do {
4283                 locktype = isc_rwlocktype_read;
4284                 lock = &rbtdb->node_locks[node->locknum].lock;
4285                 NODE_LOCK(lock, locktype);
4286
4287                 /*
4288                  * Look for NS and RRSIG NS rdatasets.
4289                  */
4290                 found = NULL;
4291                 foundsig = NULL;
4292                 header_prev = NULL;
4293                 for (header = node->data;
4294                      header != NULL;
4295                      header = header_next) {
4296                         header_next = header->next;
4297                         if (header->rdh_ttl <  search->now) {
4298                                 /*
4299                                  * This rdataset is stale.  If no one else is
4300                                  * using the node, we can clean it up right
4301                                  * now, otherwise we mark it as stale, and
4302                                  * the node as dirty, so it will get cleaned
4303                                  * up later.
4304                                  */
4305                                 if ((header->rdh_ttl <  search->now -
4306                                                     RBTDB_VIRTUAL) &&
4307                                     (locktype == isc_rwlocktype_write ||
4308                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4309                                         /*
4310                                          * We update the node's status only
4311                                          * when we can get write access.
4312                                          */
4313                                         locktype = isc_rwlocktype_write;
4314
4315                                         if (dns_rbtnode_refcurrent(node)
4316                                             == 0) {
4317                                                 isc_mem_t *m;
4318
4319                                                 m = search->rbtdb->common.mctx;
4320                                                 clean_stale_headers(
4321                                                         search->rbtdb,
4322                                                         m, header);
4323                                                 if (header_prev != NULL)
4324                                                         header_prev->next =
4325                                                                 header->next;
4326                                                 else
4327                                                         node->data =
4328                                                                 header->next;
4329                                                 free_rdataset(rbtdb, m,
4330                                                               header);
4331                                         } else {
4332                                                 header->attributes |=
4333                                                         RDATASET_ATTR_STALE;
4334                                                 node->dirty = 1;
4335                                                 header_prev = header;
4336                                         }
4337                                 } else
4338                                         header_prev = header;
4339                         } else if (EXISTS(header)) {
4340                                 /*
4341                                  * We've found an extant rdataset.  See if
4342                                  * we're interested in it.
4343                                  */
4344                                 if (header->type == dns_rdatatype_ns) {
4345                                         found = header;
4346                                         if (foundsig != NULL)
4347                                                 break;
4348                                 } else if (header->type ==
4349                                            RBTDB_RDATATYPE_SIGNS) {
4350                                         foundsig = header;
4351                                         if (found != NULL)
4352                                                 break;
4353                                 }
4354                                 header_prev = header;
4355                         } else
4356                                 header_prev = header;
4357                 }
4358
4359                 if (found != NULL) {
4360                         /*
4361                          * If we have to set foundname, we do it before
4362                          * anything else.  If we were to set foundname after
4363                          * we had set nodep or bound the rdataset, then we'd
4364                          * have to undo that work if dns_name_concatenate()
4365                          * failed.  By setting foundname first, there's
4366                          * nothing to undo if we have trouble.
4367                          */
4368                         if (foundname != NULL) {
4369                                 dns_name_init(&name, NULL);
4370                                 dns_rbt_namefromnode(node, &name);
4371                                 result = dns_name_copy(&name, foundname, NULL);
4372                                 while (result == ISC_R_SUCCESS && i > 0) {
4373                                         i--;
4374                                         level_node = search->chain.levels[i];
4375                                         dns_name_init(&name, NULL);
4376                                         dns_rbt_namefromnode(level_node,
4377                                                              &name);
4378                                         result =
4379                                                 dns_name_concatenate(foundname,
4380                                                                      &name,
4381                                                                      foundname,
4382                                                                      NULL);
4383                                 }
4384                                 if (result != ISC_R_SUCCESS) {
4385                                         *nodep = NULL;
4386                                         goto node_exit;
4387                                 }
4388                         }
4389                         result = DNS_R_DELEGATION;
4390                         if (nodep != NULL) {
4391                                 new_reference(search->rbtdb, node);
4392                                 *nodep = node;
4393                         }
4394                         bind_rdataset(search->rbtdb, node, found, search->now,
4395                                       rdataset);
4396                         if (foundsig != NULL)
4397                                 bind_rdataset(search->rbtdb, node, foundsig,
4398                                               search->now, sigrdataset);
4399                         if (need_headerupdate(found, search->now) ||
4400                             (foundsig != NULL &&
4401                              need_headerupdate(foundsig, search->now))) {
4402                                 if (locktype != isc_rwlocktype_write) {
4403                                         NODE_UNLOCK(lock, locktype);
4404                                         NODE_LOCK(lock, isc_rwlocktype_write);
4405                                         locktype = isc_rwlocktype_write;
4406                                         POST(locktype);
4407                                 }
4408                                 if (need_headerupdate(found, search->now))
4409                                         update_header(search->rbtdb, found,
4410                                                       search->now);
4411                                 if (foundsig != NULL &&
4412                                     need_headerupdate(foundsig, search->now)) {
4413                                         update_header(search->rbtdb, foundsig,
4414                                                       search->now);
4415                                 }
4416                         }
4417                 }
4418
4419         node_exit:
4420                 NODE_UNLOCK(lock, locktype);
4421
4422                 if (found == NULL && i > 0) {
4423                         i--;
4424                         node = search->chain.levels[i];
4425                 } else
4426                         done = ISC_TRUE;
4427
4428         } while (!done);
4429
4430         return (result);
4431 }
4432
4433 static isc_result_t
4434 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4435                   isc_stdtime_t now, dns_name_t *foundname,
4436                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4437 {
4438         dns_rbtnode_t *node;
4439         rdatasetheader_t *header, *header_next, *header_prev;
4440         rdatasetheader_t *found, *foundsig;
4441         isc_boolean_t empty_node;
4442         isc_result_t result;
4443         dns_fixedname_t fname, forigin;
4444         dns_name_t *name, *origin;
4445         rbtdb_rdatatype_t matchtype, sigmatchtype;
4446         nodelock_t *lock;
4447         isc_rwlocktype_t locktype;
4448
4449         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4450         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4451                                              dns_rdatatype_nsec);
4452
4453         do {
4454                 node = NULL;
4455                 dns_fixedname_init(&fname);
4456                 name = dns_fixedname_name(&fname);
4457                 dns_fixedname_init(&forigin);
4458                 origin = dns_fixedname_name(&forigin);
4459                 result = dns_rbtnodechain_current(&search->chain, name,
4460                                                   origin, &node);
4461                 if (result != ISC_R_SUCCESS)
4462                         return (result);
4463                 locktype = isc_rwlocktype_read;
4464                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4465                 NODE_LOCK(lock, locktype);
4466                 found = NULL;
4467                 foundsig = NULL;
4468                 empty_node = ISC_TRUE;
4469                 header_prev = NULL;
4470                 for (header = node->data;
4471                      header != NULL;
4472                      header = header_next) {
4473                         header_next = header->next;
4474                         if (header->rdh_ttl <  now) {
4475                                 /*
4476                                  * This rdataset is stale.  If no one else is
4477                                  * using the node, we can clean it up right
4478                                  * now, otherwise we mark it as stale, and the
4479                                  * node as dirty, so it will get cleaned up
4480                                  * later.
4481                                  */
4482                                 if ((header->rdh_ttl <  now - RBTDB_VIRTUAL) &&
4483                                     (locktype == isc_rwlocktype_write ||
4484                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4485                                         /*
4486                                          * We update the node's status only
4487                                          * when we can get write access.
4488                                          */
4489                                         locktype = isc_rwlocktype_write;
4490
4491                                         if (dns_rbtnode_refcurrent(node)
4492                                             == 0) {
4493                                                 isc_mem_t *m;
4494
4495                                                 m = search->rbtdb->common.mctx;
4496                                                 clean_stale_headers(
4497                                                         search->rbtdb,
4498                                                         m, header);
4499                                                 if (header_prev != NULL)
4500                                                         header_prev->next =
4501                                                                 header->next;
4502                                                 else
4503                                                         node->data = header->next;
4504                                                 free_rdataset(search->rbtdb, m,
4505                                                               header);
4506                                         } else {
4507                                                 header->attributes |=
4508                                                         RDATASET_ATTR_STALE;
4509                                                 node->dirty = 1;
4510                                                 header_prev = header;
4511                                         }
4512                                 } else
4513                                         header_prev = header;
4514                                 continue;
4515                         }
4516                         if (NONEXISTENT(header) ||
4517                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
4518                                 header_prev = header;
4519                                 continue;
4520                         }
4521                         empty_node = ISC_FALSE;
4522                         if (header->type == matchtype)
4523                                 found = header;
4524                         else if (header->type == sigmatchtype)
4525                                 foundsig = header;
4526                         header_prev = header;
4527                 }
4528                 if (found != NULL) {
4529                         result = dns_name_concatenate(name, origin,
4530                                                       foundname, NULL);
4531                         if (result != ISC_R_SUCCESS)
4532                                 goto unlock_node;
4533                         bind_rdataset(search->rbtdb, node, found,
4534                                       now, rdataset);
4535                         if (foundsig != NULL)
4536                                 bind_rdataset(search->rbtdb, node, foundsig,
4537                                               now, sigrdataset);
4538                         new_reference(search->rbtdb, node);
4539                         *nodep = node;
4540                         result = DNS_R_COVERINGNSEC;
4541                 } else if (!empty_node) {
4542                         result = ISC_R_NOTFOUND;
4543                 } else
4544                         result = dns_rbtnodechain_prev(&search->chain, NULL,
4545                                                        NULL);
4546  unlock_node:
4547                 NODE_UNLOCK(lock, locktype);
4548         } while (empty_node && result == ISC_R_SUCCESS);
4549         return (result);
4550 }
4551
4552 /*
4553  * Mark a database for response policy rewriting
4554  * or find which RPZ data is available.
4555  */
4556 #ifdef BIND9
4557 static isc_result_t
4558 rpz_enabled(dns_db_t *db, dns_rpz_st_t *st)
4559 {
4560         dns_rbtdb_t *rbtdb;
4561         isc_result_t result;
4562
4563         result = ISC_R_SUCCESS;
4564         rbtdb = (dns_rbtdb_t *)db;
4565         REQUIRE(VALID_RBTDB(rbtdb));
4566         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4567         if (st != NULL) {
4568                 dns_rpz_enabled_get(rbtdb->rpz_cidr, st);
4569         } else {
4570                 result = dns_rpz_new_cidr(rbtdb->common.mctx,
4571                                           &rbtdb->common.origin,
4572                                           &rbtdb->rpz_cidr);
4573         }
4574         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4575         return (result);
4576 }
4577
4578 /*
4579  * Search the CDIR block tree of a response policy tree of trees for all of
4580  * the IP addresses in an A or AAAA rdataset.
4581  * Among the policies for all IPv4 and IPv6 addresses for a name, choose
4582  *      the earliest configured policy,
4583  *      QNAME over IP over NSDNAME over NSIP,
4584  *      the longest prefix,
4585  *      the lexically smallest address.
4586  * The caller must have already checked that any existing policy was not
4587  * configured earlier than this policy zone and does not have a higher
4588  * precedence type.
4589  */
4590 static void
4591 rpz_findips(dns_rpz_zone_t *rpz, dns_rpz_type_t rpz_type,
4592             dns_zone_t *zone, dns_db_t *db, dns_dbversion_t *version,
4593             dns_rdataset_t *ardataset, dns_rpz_st_t *st,
4594             dns_name_t *query_qname)
4595 {
4596         dns_rbtdb_t *rbtdb;
4597         struct in_addr ina;
4598         struct in6_addr in6a;
4599         isc_netaddr_t netaddr;
4600         dns_fixedname_t selfnamef, qnamef;
4601         dns_name_t *selfname, *qname;
4602         dns_rbtnode_t *node;
4603         dns_rdataset_t zrdataset;
4604         dns_rpz_cidr_bits_t prefix;
4605         isc_result_t result;
4606         dns_rpz_policy_t rpz_policy;
4607         dns_ttl_t ttl;
4608
4609         rbtdb = (dns_rbtdb_t *)db;
4610         REQUIRE(VALID_RBTDB(rbtdb));
4611         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4612
4613         if (rbtdb->rpz_cidr == NULL) {
4614                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4615                 return;
4616         }
4617
4618         dns_fixedname_init(&selfnamef);
4619         dns_fixedname_init(&qnamef);
4620         selfname = dns_fixedname_name(&selfnamef);
4621         qname = dns_fixedname_name(&qnamef);
4622
4623         for (result = dns_rdataset_first(ardataset);
4624              result == ISC_R_SUCCESS;
4625              result = dns_rdataset_next(ardataset)) {
4626                 dns_rdata_t rdata = DNS_RDATA_INIT;
4627                 dns_rdataset_current(ardataset, &rdata);
4628                 switch (rdata.type) {
4629                 case dns_rdatatype_a:
4630                         INSIST(rdata.length == 4);
4631                         memmove(&ina.s_addr, rdata.data, 4);
4632                         isc_netaddr_fromin(&netaddr, &ina);
4633                         break;
4634                 case dns_rdatatype_aaaa:
4635                         INSIST(rdata.length == 16);
4636                         memmove(in6a.s6_addr, rdata.data, 16);
4637                         isc_netaddr_fromin6(&netaddr, &in6a);
4638                         break;
4639                 default:
4640                         continue;
4641                 }
4642
4643                 result = dns_rpz_cidr_find(rbtdb->rpz_cidr, &netaddr, rpz_type,
4644                                            selfname, qname, &prefix);
4645                 if (result != ISC_R_SUCCESS)
4646                         continue;
4647
4648                 /*
4649                  * If we already have a rule, discard this new rule if
4650                  * is not better.
4651                  * The caller has checked that st->m.rpz->num > rpz->num
4652                  * or st->m.rpz->num == rpz->num and st->m.type >= rpz_type
4653                  */
4654                 if (st->m.policy != DNS_RPZ_POLICY_MISS &&
4655                     st->m.rpz->num == rpz->num &&
4656                     (st->m.type < rpz_type ||
4657                      (st->m.type == rpz_type &&
4658                       (st->m.prefix > prefix ||
4659                        (st->m.prefix == prefix &&
4660                         0 > dns_name_rdatacompare(st->qname, qname))))))
4661                         continue;
4662
4663                 /*
4664                  * We have rpz_st an entry with a prefix at least as long as
4665                  * the prefix of the entry we had before.  Find the node
4666                  * corresponding to CDIR tree entry.
4667                  */
4668                 node = NULL;
4669                 result = dns_rbt_findnode(rbtdb->tree, qname, NULL,
4670                                           &node, NULL, 0, NULL, NULL);
4671                 if (result != ISC_R_SUCCESS) {
4672                         char namebuf[DNS_NAME_FORMATSIZE];
4673
4674                         dns_name_format(qname, namebuf, sizeof(namebuf));
4675                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_RPZ,
4676                                       DNS_LOGMODULE_RBTDB, DNS_RPZ_ERROR_LEVEL,
4677                                       "rpz_findips findnode(%s) failed: %s",
4678                                       namebuf, isc_result_totext(result));
4679                         continue;
4680                 }
4681                 /*
4682                  * First look for a simple rewrite of the IP address.
4683                  * If that fails, look for a CNAME.  If we cannot find
4684                  * a CNAME or the CNAME is neither of the special forms
4685                  * "*" or ".", treat it like a real CNAME.
4686                  */
4687                 dns_rdataset_init(&zrdataset);
4688                 result = dns_db_findrdataset(db, node, version, ardataset->type,
4689                                              0, 0, &zrdataset, NULL);
4690                 if (result != ISC_R_SUCCESS)
4691                         result = dns_db_findrdataset(db, node, version,
4692                                                      dns_rdatatype_cname,
4693                                                      0, 0, &zrdataset, NULL);
4694                 if (result == ISC_R_SUCCESS) {
4695                         if (zrdataset.type != dns_rdatatype_cname) {
4696                                 rpz_policy = DNS_RPZ_POLICY_RECORD;
4697                         } else {
4698                                 rpz_policy = dns_rpz_decode_cname(rpz,
4699                                                                   &zrdataset,
4700                                                                   selfname);
4701                                 if (rpz_policy == DNS_RPZ_POLICY_RECORD ||
4702                                     rpz_policy == DNS_RPZ_POLICY_WILDCNAME)
4703                                         result = DNS_R_CNAME;
4704                         }
4705                         ttl = zrdataset.ttl;
4706                 } else {
4707                         rpz_policy = DNS_RPZ_POLICY_RECORD;
4708                         result = DNS_R_NXRRSET;
4709                         ttl = DNS_RPZ_TTL_DEFAULT;
4710                 }
4711
4712                 /*
4713                  * Use an overriding action specified in the configuration file
4714                  */
4715                 if (rpz->policy != DNS_RPZ_POLICY_GIVEN) {
4716                         /*
4717                          * only log DNS_RPZ_POLICY_DISABLED hits
4718                          */
4719                         if (rpz->policy == DNS_RPZ_POLICY_DISABLED) {
4720                                 if (isc_log_wouldlog(dns_lctx,
4721                                                      DNS_RPZ_INFO_LEVEL)) {
4722                                         char qname_buf[DNS_NAME_FORMATSIZE];
4723                                         char rpz_qname_buf[DNS_NAME_FORMATSIZE];
4724                                         dns_name_format(query_qname, qname_buf,
4725                                                         sizeof(qname_buf));
4726                                         dns_name_format(qname, rpz_qname_buf,
4727                                                         sizeof(rpz_qname_buf));
4728
4729                                         isc_log_write(dns_lctx,
4730                                                 DNS_LOGCATEGORY_RPZ,
4731                                                 DNS_LOGMODULE_RBTDB,
4732                                                 DNS_RPZ_INFO_LEVEL,
4733                                                 "disabled rpz %s %s rewrite"
4734                                                 " %s via %s",
4735                                                 dns_rpz_type2str(rpz_type),
4736                                                 dns_rpz_policy2str(rpz_policy),
4737                                                 qname_buf, rpz_qname_buf);
4738                                 }
4739                                 continue;
4740                         }
4741
4742                         rpz_policy = rpz->policy;
4743                 }
4744
4745                 if (dns_rdataset_isassociated(st->m.rdataset))
4746                         dns_rdataset_disassociate(st->m.rdataset);
4747                 if (st->m.node != NULL)
4748                         dns_db_detachnode(st->m.db, &st->m.node);
4749                 if (st->m.db != NULL)
4750                         dns_db_detach(&st->m.db);
4751                 if (st->m.zone != NULL)
4752                         dns_zone_detach(&st->m.zone);
4753                 st->m.rpz = rpz;
4754                 st->m.type = rpz_type;
4755                 st->m.prefix = prefix;
4756                 st->m.policy = rpz_policy;
4757                 st->m.ttl = ISC_MIN(ttl, rpz->max_policy_ttl);
4758                 st->m.result = result;
4759                 dns_name_copy(qname, st->qname, NULL);
4760                 if ((rpz_policy == DNS_RPZ_POLICY_RECORD ||
4761                     rpz_policy == DNS_RPZ_POLICY_WILDCNAME) &&
4762                     result != DNS_R_NXRRSET) {
4763                         dns_rdataset_clone(&zrdataset,st->m.rdataset);
4764                         dns_db_attachnode(db, node, &st->m.node);
4765                 }
4766                 dns_db_attach(db, &st->m.db);
4767                 st->m.version = version;
4768                 dns_zone_attach(zone, &st->m.zone);
4769                 if (dns_rdataset_isassociated(&zrdataset))
4770                         dns_rdataset_disassociate(&zrdataset);
4771         }
4772
4773         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4774 }
4775 #endif
4776
4777 static isc_result_t
4778 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4779            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4780            dns_dbnode_t **nodep, dns_name_t *foundname,
4781            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4782 {
4783         dns_rbtnode_t *node = NULL;
4784         isc_result_t result;
4785         rbtdb_search_t search;
4786         isc_boolean_t cname_ok = ISC_TRUE;
4787         isc_boolean_t empty_node;
4788         nodelock_t *lock;
4789         isc_rwlocktype_t locktype;
4790         rdatasetheader_t *header, *header_prev, *header_next;
4791         rdatasetheader_t *found, *nsheader;
4792         rdatasetheader_t *foundsig, *nssig, *cnamesig;
4793         rdatasetheader_t *update, *updatesig;
4794         rbtdb_rdatatype_t sigtype, negtype;
4795
4796         UNUSED(version);
4797
4798         search.rbtdb = (dns_rbtdb_t *)db;
4799
4800         REQUIRE(VALID_RBTDB(search.rbtdb));
4801         REQUIRE(version == NULL);
4802
4803         if (now == 0)
4804                 isc_stdtime_get(&now);
4805
4806         search.rbtversion = NULL;
4807         search.serial = 1;
4808         search.options = options;
4809         search.copy_name = ISC_FALSE;
4810         search.need_cleanup = ISC_FALSE;
4811         search.wild = ISC_FALSE;
4812         search.zonecut = NULL;
4813         dns_fixedname_init(&search.zonecut_name);
4814         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4815         search.now = now;
4816         update = NULL;
4817         updatesig = NULL;
4818
4819         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4820
4821         /*
4822          * Search down from the root of the tree.  If, while going down, we
4823          * encounter a callback node, cache_zonecut_callback() will search the
4824          * rdatasets at the zone cut for a DNAME rdataset.
4825          */
4826         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4827                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
4828                                   cache_zonecut_callback, &search);
4829
4830         if (result == DNS_R_PARTIALMATCH) {
4831                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4832                         result = find_coveringnsec(&search, nodep, now,
4833                                                    foundname, rdataset,
4834                                                    sigrdataset);
4835                         if (result == DNS_R_COVERINGNSEC)
4836                                 goto tree_exit;
4837                 }
4838                 if (search.zonecut != NULL) {
4839                     result = setup_delegation(&search, nodep, foundname,
4840                                               rdataset, sigrdataset);
4841                     goto tree_exit;
4842                 } else {
4843                 find_ns:
4844                         result = find_deepest_zonecut(&search, node, nodep,
4845                                                       foundname, rdataset,
4846                                                       sigrdataset);
4847                         goto tree_exit;
4848                 }
4849         } else if (result != ISC_R_SUCCESS)
4850                 goto tree_exit;
4851
4852         /*
4853          * Certain DNSSEC types are not subject to CNAME matching
4854          * (RFC4035, section 2.5 and RFC3007).
4855          *
4856          * We don't check for RRSIG, because we don't store RRSIG records
4857          * directly.
4858          */
4859         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4860                 cname_ok = ISC_FALSE;
4861
4862         /*
4863          * We now go looking for rdata...
4864          */
4865
4866         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4867         locktype = isc_rwlocktype_read;
4868         NODE_LOCK(lock, locktype);
4869
4870         found = NULL;
4871         foundsig = NULL;
4872         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4873         negtype = RBTDB_RDATATYPE_VALUE(0, type);
4874         nsheader = NULL;
4875         nssig = NULL;
4876         cnamesig = NULL;
4877         empty_node = ISC_TRUE;
4878         header_prev = NULL;
4879         for (header = node->data; header != NULL; header = header_next) {
4880                 header_next = header->next;
4881                 if (header->rdh_ttl <  now) {
4882                         /*
4883                          * This rdataset is stale.  If no one else is using the
4884                          * node, we can clean it up right now, otherwise we
4885                          * mark it as stale, and the node as dirty, so it will
4886                          * get cleaned up later.
4887                          */
4888                         if ((header->rdh_ttl <  now - RBTDB_VIRTUAL) &&
4889                             (locktype == isc_rwlocktype_write ||
4890                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4891                                 /*
4892                                  * We update the node's status only when we
4893                                  * can get write access.
4894                                  */
4895                                 locktype = isc_rwlocktype_write;
4896
4897                                 if (dns_rbtnode_refcurrent(node) == 0) {
4898                                         isc_mem_t *mctx;
4899
4900                                         mctx = search.rbtdb->common.mctx;
4901                                         clean_stale_headers(search.rbtdb, mctx,
4902                                                             header);
4903                                         if (header_prev != NULL)
4904                                                 header_prev->next =
4905                                                         header->next;
4906                                         else
4907                                                 node->data = header->next;
4908                                         free_rdataset(search.rbtdb, mctx,
4909                                                       header);
4910                                 } else {
4911                                         header->attributes |=
4912                                                 RDATASET_ATTR_STALE;
4913                                         node->dirty = 1;
4914                                         header_prev = header;
4915                                 }
4916                         } else
4917                                 header_prev = header;
4918                 } else if (EXISTS(header)) {
4919                         /*
4920                          * We now know that there is at least one active
4921                          * non-stale rdataset at this node.
4922                          */
4923                         empty_node = ISC_FALSE;
4924
4925                         /*
4926                          * If we found a type we were looking for, remember
4927                          * it.
4928                          */
4929                         if (header->type == type ||
4930                             (type == dns_rdatatype_any &&
4931                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4932                             (cname_ok && header->type ==
4933                              dns_rdatatype_cname)) {
4934                                 /*
4935                                  * We've found the answer.
4936                                  */
4937                                 found = header;
4938                                 if (header->type == dns_rdatatype_cname &&
4939                                     cname_ok &&
4940                                     cnamesig != NULL) {
4941                                         /*
4942                                          * If we've already got the
4943                                          * CNAME RRSIG, use it.
4944                                          */
4945                                         foundsig = cnamesig;
4946                                 }
4947                         } else if (header->type == sigtype) {
4948                                 /*
4949                                  * We've found the RRSIG rdataset for our
4950                                  * target type.  Remember it.
4951                                  */
4952                                 foundsig = header;
4953                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4954                                    header->type == negtype) {
4955                                 /*
4956                                  * We've found a negative cache entry.
4957                                  */
4958                                 found = header;
4959                         } else if (header->type == dns_rdatatype_ns) {
4960                                 /*
4961                                  * Remember a NS rdataset even if we're
4962                                  * not specifically looking for it, because
4963                                  * we might need it later.
4964                                  */
4965                                 nsheader = header;
4966                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4967                                 /*
4968                                  * If we need the NS rdataset, we'll also
4969                                  * need its signature.
4970                                  */
4971                                 nssig = header;
4972                         } else if (cname_ok &&
4973                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
4974                                 /*
4975                                  * If we get a CNAME match, we'll also need
4976                                  * its signature.
4977                                  */
4978                                 cnamesig = header;
4979                         }
4980                         header_prev = header;
4981                 } else
4982                         header_prev = header;
4983         }
4984
4985         if (empty_node) {
4986                 /*
4987                  * We have an exact match for the name, but there are no
4988                  * extant rdatasets.  That means that this node doesn't
4989                  * meaningfully exist, and that we really have a partial match.
4990                  */
4991                 NODE_UNLOCK(lock, locktype);
4992                 goto find_ns;
4993         }
4994
4995         /*
4996          * If we didn't find what we were looking for...
4997          */
4998         if (found == NULL ||
4999             (DNS_TRUST_ADDITIONAL(found->trust) &&
5000              ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
5001             (found->trust == dns_trust_glue &&
5002              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
5003             (DNS_TRUST_PENDING(found->trust) &&
5004              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
5005                 /*
5006                  * If there is an NS rdataset at this node, then this is the
5007                  * deepest zone cut.
5008                  */
5009                 if (nsheader != NULL) {
5010                         if (nodep != NULL) {
5011                                 new_reference(search.rbtdb, node);
5012                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
5013                                 *nodep = node;
5014                         }
5015                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
5016                                       rdataset);
5017                         if (need_headerupdate(nsheader, search.now))
5018                                 update = nsheader;
5019                         if (nssig != NULL) {
5020                                 bind_rdataset(search.rbtdb, node, nssig,
5021                                               search.now, sigrdataset);
5022                                 if (need_headerupdate(nssig, search.now))
5023                                         updatesig = nssig;
5024                         }
5025                         result = DNS_R_DELEGATION;
5026                         goto node_exit;
5027                 }
5028
5029                 /*
5030                  * Go find the deepest zone cut.
5031                  */
5032                 NODE_UNLOCK(lock, locktype);
5033                 goto find_ns;
5034         }
5035
5036         /*
5037          * We found what we were looking for, or we found a CNAME.
5038          */
5039
5040         if (nodep != NULL) {
5041                 new_reference(search.rbtdb, node);
5042                 INSIST(!ISC_LINK_LINKED(node, deadlink));
5043                 *nodep = node;
5044         }
5045
5046         if (NEGATIVE(found)) {
5047                 /*
5048                  * We found a negative cache entry.
5049                  */
5050                 if (NXDOMAIN(found))
5051                         result = DNS_R_NCACHENXDOMAIN;
5052                 else
5053                         result = DNS_R_NCACHENXRRSET;
5054         } else if (type != found->type &&
5055                    type != dns_rdatatype_any &&
5056                    found->type == dns_rdatatype_cname) {
5057                 /*
5058                  * We weren't doing an ANY query and we found a CNAME instead
5059                  * of the type we were looking for, so we need to indicate
5060                  * that result to the caller.
5061                  */
5062                 result = DNS_R_CNAME;
5063         } else {
5064                 /*
5065                  * An ordinary successful query!
5066                  */
5067                 result = ISC_R_SUCCESS;
5068         }
5069
5070         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
5071             result == DNS_R_NCACHENXRRSET) {
5072                 bind_rdataset(search.rbtdb, node, found, search.now,
5073                               rdataset);
5074                 if (need_headerupdate(found, search.now))
5075                         update = found;
5076                 if (!NEGATIVE(found) && foundsig != NULL) {
5077                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
5078                                       sigrdataset);
5079                         if (need_headerupdate(foundsig, search.now))
5080                                 updatesig = foundsig;
5081                 }
5082         }
5083
5084  node_exit:
5085         if ((update != NULL || updatesig != NULL) &&
5086             locktype != isc_rwlocktype_write) {
5087                 NODE_UNLOCK(lock, locktype);
5088                 NODE_LOCK(lock, isc_rwlocktype_write);
5089                 locktype = isc_rwlocktype_write;
5090                 POST(locktype);
5091         }
5092         if (update != NULL && need_headerupdate(update, search.now))
5093                 update_header(search.rbtdb, update, search.now);
5094         if (updatesig != NULL && need_headerupdate(updatesig, search.now))
5095                 update_header(search.rbtdb, updatesig, search.now);
5096
5097         NODE_UNLOCK(lock, locktype);
5098
5099  tree_exit:
5100         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5101
5102         /*
5103          * If we found a zonecut but aren't going to use it, we have to
5104          * let go of it.
5105          */
5106         if (search.need_cleanup) {
5107                 node = search.zonecut;
5108                 INSIST(node != NULL);
5109                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
5110
5111                 NODE_LOCK(lock, isc_rwlocktype_read);
5112                 decrement_reference(search.rbtdb, node, 0,
5113                                     isc_rwlocktype_read, isc_rwlocktype_none,
5114                                     ISC_FALSE);
5115                 NODE_UNLOCK(lock, isc_rwlocktype_read);
5116         }
5117
5118         dns_rbtnodechain_reset(&search.chain);
5119
5120         return (result);
5121 }
5122
5123 static isc_result_t
5124 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
5125                   isc_stdtime_t now, dns_dbnode_t **nodep,
5126                   dns_name_t *foundname,
5127                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
5128 {
5129         dns_rbtnode_t *node = NULL;
5130         nodelock_t *lock;
5131         isc_result_t result;
5132         rbtdb_search_t search;
5133         rdatasetheader_t *header, *header_prev, *header_next;
5134         rdatasetheader_t *found, *foundsig;
5135         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
5136         isc_rwlocktype_t locktype;
5137
5138         search.rbtdb = (dns_rbtdb_t *)db;
5139
5140         REQUIRE(VALID_RBTDB(search.rbtdb));
5141
5142         if (now == 0)
5143                 isc_stdtime_get(&now);
5144
5145         search.rbtversion = NULL;
5146         search.serial = 1;
5147         search.options = options;
5148         search.copy_name = ISC_FALSE;
5149         search.need_cleanup = ISC_FALSE;
5150         search.wild = ISC_FALSE;
5151         search.zonecut = NULL;
5152         dns_fixedname_init(&search.zonecut_name);
5153         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
5154         search.now = now;
5155
5156         if ((options & DNS_DBFIND_NOEXACT) != 0)
5157                 rbtoptions |= DNS_RBTFIND_NOEXACT;
5158
5159         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5160
5161         /*
5162          * Search down from the root of the tree.
5163          */
5164         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
5165                                   &search.chain, rbtoptions, NULL, &search);
5166
5167         if (result == DNS_R_PARTIALMATCH) {
5168         find_ns:
5169                 result = find_deepest_zonecut(&search, node, nodep, foundname,
5170                                               rdataset, sigrdataset);
5171                 goto tree_exit;
5172         } else if (result != ISC_R_SUCCESS)
5173                 goto tree_exit;
5174
5175         /*
5176          * We now go looking for an NS rdataset at the node.
5177          */
5178
5179         lock = &(search.rbtdb->node_locks[node->locknum].lock);
5180         locktype = isc_rwlocktype_read;
5181         NODE_LOCK(lock, locktype);
5182
5183         found = NULL;
5184         foundsig = NULL;
5185         header_prev = NULL;
5186         for (header = node->data; header != NULL; header = header_next) {
5187                 header_next = header->next;
5188                 if (header->rdh_ttl <  now) {
5189                         /*
5190                          * This rdataset is stale.  If no one else is using the
5191                          * node, we can clean it up right now, otherwise we
5192                          * mark it as stale, and the node as dirty, so it will
5193                          * get cleaned up later.
5194                          */
5195                         if ((header->rdh_ttl < now - RBTDB_VIRTUAL) &&
5196                             (locktype == isc_rwlocktype_write ||
5197                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5198                                 /*
5199                                  * We update the node's status only when we
5200                                  * can get write access.
5201                                  */
5202                                 locktype = isc_rwlocktype_write;
5203
5204                                 if (dns_rbtnode_refcurrent(node) == 0) {
5205                                         isc_mem_t *mctx;
5206
5207                                         mctx = search.rbtdb->common.mctx;
5208                                         clean_stale_headers(search.rbtdb, mctx,
5209                                                             header);
5210                                         if (header_prev != NULL)
5211                                                 header_prev->next =
5212                                                         header->next;
5213                                         else
5214                                                 node->data = header->next;
5215                                         free_rdataset(search.rbtdb, mctx,
5216                                                       header);
5217                                 } else {
5218                                         header->attributes |=
5219                                                 RDATASET_ATTR_STALE;
5220                                         node->dirty = 1;
5221                                         header_prev = header;
5222                                 }
5223                         } else
5224                                 header_prev = header;
5225                 } else if (EXISTS(header)) {
5226                         /*
5227                          * If we found a type we were looking for, remember
5228                          * it.
5229                          */
5230                         if (header->type == dns_rdatatype_ns) {
5231                                 /*
5232                                  * Remember a NS rdataset even if we're
5233                                  * not specifically looking for it, because
5234                                  * we might need it later.
5235                                  */
5236                                 found = header;
5237                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
5238                                 /*
5239                                  * If we need the NS rdataset, we'll also
5240                                  * need its signature.
5241                                  */
5242                                 foundsig = header;
5243                         }
5244                         header_prev = header;
5245                 } else
5246                         header_prev = header;
5247         }
5248
5249         if (found == NULL) {
5250                 /*
5251                  * No NS records here.
5252                  */
5253                 NODE_UNLOCK(lock, locktype);
5254                 goto find_ns;
5255         }
5256
5257         if (nodep != NULL) {
5258                 new_reference(search.rbtdb, node);
5259                 INSIST(!ISC_LINK_LINKED(node, deadlink));
5260                 *nodep = node;
5261         }
5262
5263         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
5264         if (foundsig != NULL)
5265                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
5266                               sigrdataset);
5267
5268         if (need_headerupdate(found, search.now) ||
5269             (foundsig != NULL &&  need_headerupdate(foundsig, search.now))) {
5270                 if (locktype != isc_rwlocktype_write) {
5271                         NODE_UNLOCK(lock, locktype);
5272                         NODE_LOCK(lock, isc_rwlocktype_write);
5273                         locktype = isc_rwlocktype_write;
5274                         POST(locktype);
5275                 }
5276                 if (need_headerupdate(found, search.now))
5277                         update_header(search.rbtdb, found, search.now);
5278                 if (foundsig != NULL &&
5279                     need_headerupdate(foundsig, search.now)) {
5280                         update_header(search.rbtdb, foundsig, search.now);
5281                 }
5282         }
5283
5284         NODE_UNLOCK(lock, locktype);
5285
5286  tree_exit:
5287         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5288
5289         INSIST(!search.need_cleanup);
5290
5291         dns_rbtnodechain_reset(&search.chain);
5292
5293         if (result == DNS_R_DELEGATION)
5294                 result = ISC_R_SUCCESS;
5295
5296         return (result);
5297 }
5298
5299 static void
5300 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
5301         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5302         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
5303         unsigned int refs;
5304
5305         REQUIRE(VALID_RBTDB(rbtdb));
5306         REQUIRE(targetp != NULL && *targetp == NULL);
5307
5308         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
5309         dns_rbtnode_refincrement(node, &refs);
5310         INSIST(refs != 0);
5311         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
5312
5313         *targetp = source;
5314 }
5315
5316 static void
5317 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
5318         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5319         dns_rbtnode_t *node;
5320         isc_boolean_t want_free = ISC_FALSE;
5321         isc_boolean_t inactive = ISC_FALSE;
5322         rbtdb_nodelock_t *nodelock;
5323
5324         REQUIRE(VALID_RBTDB(rbtdb));
5325         REQUIRE(targetp != NULL && *targetp != NULL);
5326
5327         node = (dns_rbtnode_t *)(*targetp);
5328         nodelock = &rbtdb->node_locks[node->locknum];
5329
5330         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
5331
5332         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
5333                                 isc_rwlocktype_none, ISC_FALSE)) {
5334                 if (isc_refcount_current(&nodelock->references) == 0 &&
5335                     nodelock->exiting) {
5336                         inactive = ISC_TRUE;
5337                 }
5338         }
5339
5340         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
5341
5342         *targetp = NULL;
5343
5344         if (inactive) {
5345                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5346                 rbtdb->active--;
5347                 if (rbtdb->active == 0)
5348                         want_free = ISC_TRUE;
5349                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5350                 if (want_free) {
5351                         char buf[DNS_NAME_FORMATSIZE];
5352                         if (dns_name_dynamic(&rbtdb->common.origin))
5353                                 dns_name_format(&rbtdb->common.origin, buf,
5354                                                 sizeof(buf));
5355                         else
5356                                 strcpy(buf, "<UNKNOWN>");
5357                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
5358                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
5359                                       "calling free_rbtdb(%s)", buf);
5360                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
5361                 }
5362         }
5363 }
5364
5365 static isc_result_t
5366 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
5367         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5368         dns_rbtnode_t *rbtnode = node;
5369         rdatasetheader_t *header;
5370         isc_boolean_t force_expire = ISC_FALSE;
5371         /*
5372          * These are the category and module used by the cache cleaner.
5373          */
5374         isc_boolean_t log = ISC_FALSE;
5375         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
5376         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
5377         int level = ISC_LOG_DEBUG(2);
5378         char printname[DNS_NAME_FORMATSIZE];
5379
5380         REQUIRE(VALID_RBTDB(rbtdb));
5381
5382         /*
5383          * Caller must hold a tree lock.
5384          */
5385
5386         if (now == 0)
5387                 isc_stdtime_get(&now);
5388
5389         if (isc_mem_isovermem(rbtdb->common.mctx)) {
5390                 isc_uint32_t val;
5391
5392                 isc_random_get(&val);
5393                 /*
5394                  * XXXDCL Could stand to have a better policy, like LRU.
5395                  */
5396                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
5397
5398                 /*
5399                  * Note that 'log' can be true IFF overmem is also true.
5400                  * overmem can currently only be true for cache
5401                  * databases -- hence all of the "overmem cache" log strings.
5402                  */
5403                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
5404                 if (log)
5405                         isc_log_write(dns_lctx, category, module, level,
5406                                       "overmem cache: %s %s",
5407                                       force_expire ? "FORCE" : "check",
5408                                       dns_rbt_formatnodename(rbtnode,
5409                                                            printname,
5410                                                            sizeof(printname)));
5411         }
5412
5413         /*
5414          * We may not need write access, but this code path is not performance
5415          * sensitive, so it should be okay to always lock as a writer.
5416          */
5417         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5418                   isc_rwlocktype_write);
5419
5420         for (header = rbtnode->data; header != NULL; header = header->next)
5421                 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
5422                         /*
5423                          * We don't check if refcurrent(rbtnode) == 0 and try
5424                          * to free like we do in cache_find(), because
5425                          * refcurrent(rbtnode) must be non-zero.  This is so
5426                          * because 'node' is an argument to the function.
5427                          */
5428                         header->attributes |= RDATASET_ATTR_STALE;
5429                         rbtnode->dirty = 1;
5430                         if (log)
5431                                 isc_log_write(dns_lctx, category, module,
5432                                               level, "overmem cache: stale %s",
5433                                               printname);
5434                 } else if (force_expire) {
5435                         if (! RETAIN(header)) {
5436                                 set_ttl(rbtdb, header, 0);
5437                                 header->attributes |= RDATASET_ATTR_STALE;
5438                                 rbtnode->dirty = 1;
5439                         } else if (log) {
5440                                 isc_log_write(dns_lctx, category, module,
5441                                               level, "overmem cache: "
5442                                               "reprieve by RETAIN() %s",
5443                                               printname);
5444                         }
5445                 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
5446                         isc_log_write(dns_lctx, category, module, level,
5447                                       "overmem cache: saved %s", printname);
5448
5449         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5450                     isc_rwlocktype_write);
5451
5452         return (ISC_R_SUCCESS);
5453 }
5454
5455 static void
5456 overmem(dns_db_t *db, isc_boolean_t overmem) {
5457         /* This is an empty callback.  See adb.c:water() */
5458
5459         UNUSED(db);
5460         UNUSED(overmem);
5461
5462         return;
5463 }
5464
5465 static void
5466 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5467         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5468         dns_rbtnode_t *rbtnode = node;
5469         isc_boolean_t first;
5470
5471         REQUIRE(VALID_RBTDB(rbtdb));
5472
5473         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5474                   isc_rwlocktype_read);
5475
5476         fprintf(out, "node %p, %u references, locknum = %u\n",
5477                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5478                 rbtnode->locknum);
5479         if (rbtnode->data != NULL) {
5480                 rdatasetheader_t *current, *top_next;
5481
5482                 for (current = rbtnode->data; current != NULL;
5483                      current = top_next) {
5484                         top_next = current->next;
5485                         first = ISC_TRUE;
5486                         fprintf(out, "\ttype %u", current->type);
5487                         do {
5488                                 if (!first)
5489                                         fprintf(out, "\t");
5490                                 first = ISC_FALSE;
5491                                 fprintf(out,
5492                                         "\tserial = %lu, ttl = %u, "
5493                                         "trust = %u, attributes = %u, "
5494                                         "resign = %u\n",
5495                                         (unsigned long)current->serial,
5496                                         current->rdh_ttl,
5497                                         current->trust,
5498                                         current->attributes,
5499                                         current->resign);
5500                                 current = current->down;
5501                         } while (current != NULL);
5502                 }
5503         } else
5504                 fprintf(out, "(empty)\n");
5505
5506         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5507                     isc_rwlocktype_read);
5508 }
5509
5510 static isc_result_t
5511 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5512 {
5513         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5514         rbtdb_dbiterator_t *rbtdbiter;
5515
5516         REQUIRE(VALID_RBTDB(rbtdb));
5517
5518         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5519         if (rbtdbiter == NULL)
5520                 return (ISC_R_NOMEMORY);
5521
5522         rbtdbiter->common.methods = &dbiterator_methods;
5523         rbtdbiter->common.db = NULL;
5524         dns_db_attach(db, &rbtdbiter->common.db);
5525         rbtdbiter->common.relative_names =
5526                         ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5527         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5528         rbtdbiter->common.cleaning = ISC_FALSE;
5529         rbtdbiter->paused = ISC_TRUE;
5530         rbtdbiter->tree_locked = isc_rwlocktype_none;
5531         rbtdbiter->result = ISC_R_SUCCESS;
5532         dns_fixedname_init(&rbtdbiter->name);
5533         dns_fixedname_init(&rbtdbiter->origin);
5534         rbtdbiter->node = NULL;
5535         rbtdbiter->delete = 0;
5536         rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5537         rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5538         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5539         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5540         dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5541         if (rbtdbiter->nsec3only)
5542                 rbtdbiter->current = &rbtdbiter->nsec3chain;
5543         else
5544                 rbtdbiter->current = &rbtdbiter->chain;
5545
5546         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5547
5548         return (ISC_R_SUCCESS);
5549 }
5550
5551 static isc_result_t
5552 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5553                   dns_rdatatype_t type, dns_rdatatype_t covers,
5554                   isc_stdtime_t now, dns_rdataset_t *rdataset,
5555                   dns_rdataset_t *sigrdataset)
5556 {
5557         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5558         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5559         rdatasetheader_t *header, *header_next, *found, *foundsig;
5560         rbtdb_serial_t serial;
5561         rbtdb_version_t *rbtversion = version;
5562         isc_boolean_t close_version = ISC_FALSE;
5563         rbtdb_rdatatype_t matchtype, sigmatchtype;
5564
5565         REQUIRE(VALID_RBTDB(rbtdb));
5566         REQUIRE(type != dns_rdatatype_any);
5567         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
5568
5569         if (rbtversion == NULL) {
5570                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5571                 close_version = ISC_TRUE;
5572         }
5573         serial = rbtversion->serial;
5574         now = 0;
5575
5576         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5577                   isc_rwlocktype_read);
5578
5579         found = NULL;
5580         foundsig = NULL;
5581         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5582         if (covers == 0)
5583                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5584         else
5585                 sigmatchtype = 0;
5586
5587         for (header = rbtnode->data; header != NULL; header = header_next) {
5588                 header_next = header->next;
5589                 do {
5590                         if (header->serial <= serial &&
5591                             !IGNORE(header)) {
5592                                 /*
5593                                  * Is this a "this rdataset doesn't
5594                                  * exist" record?
5595                                  */
5596                                 if (NONEXISTENT(header))
5597                                         header = NULL;
5598                                 break;
5599                         } else
5600                                 header = header->down;
5601                 } while (header != NULL);
5602                 if (header != NULL) {
5603                         /*
5604                          * We have an active, extant rdataset.  If it's a
5605                          * type we're looking for, remember it.
5606                          */
5607                         if (header->type == matchtype) {
5608                                 found = header;
5609                                 if (foundsig != NULL)
5610                                         break;
5611                         } else if (header->type == sigmatchtype) {
5612                                 foundsig = header;
5613                                 if (found != NULL)
5614                                         break;
5615                         }
5616                 }
5617         }
5618         if (found != NULL) {
5619                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5620                 if (foundsig != NULL)
5621                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5622                                       sigrdataset);
5623         }
5624
5625         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5626                     isc_rwlocktype_read);
5627
5628         if (close_version)
5629                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5630                              ISC_FALSE);
5631
5632         if (found == NULL)
5633                 return (ISC_R_NOTFOUND);
5634
5635         return (ISC_R_SUCCESS);
5636 }
5637
5638 static isc_result_t
5639 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5640                    dns_rdatatype_t type, dns_rdatatype_t covers,
5641                    isc_stdtime_t now, dns_rdataset_t *rdataset,
5642                    dns_rdataset_t *sigrdataset)
5643 {
5644         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5645         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5646         rdatasetheader_t *header, *header_next, *found, *foundsig;
5647         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5648         isc_result_t result;
5649         nodelock_t *lock;
5650         isc_rwlocktype_t locktype;
5651
5652         REQUIRE(VALID_RBTDB(rbtdb));
5653         REQUIRE(type != dns_rdatatype_any);
5654
5655         UNUSED(version);
5656
5657         result = ISC_R_SUCCESS;
5658
5659         if (now == 0)
5660                 isc_stdtime_get(&now);
5661
5662         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5663         locktype = isc_rwlocktype_read;
5664         NODE_LOCK(lock, locktype);
5665
5666         found = NULL;
5667         foundsig = NULL;
5668         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5669         negtype = RBTDB_RDATATYPE_VALUE(0, type);
5670         if (covers == 0)
5671                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5672         else
5673                 sigmatchtype = 0;
5674
5675         for (header = rbtnode->data; header != NULL; header = header_next) {
5676                 header_next = header->next;
5677                 if (header->rdh_ttl < now) {
5678                         if ((header->rdh_ttl < now - RBTDB_VIRTUAL) &&
5679                             (locktype == isc_rwlocktype_write ||
5680                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5681                                 /*
5682                                  * We update the node's status only when we
5683                                  * can get write access.
5684                                  */
5685                                 locktype = isc_rwlocktype_write;
5686
5687                                 /*
5688                                  * We don't check if refcurrent(rbtnode) == 0
5689                                  * and try to free like we do in cache_find(),
5690                                  * because refcurrent(rbtnode) must be
5691                                  * non-zero.  This is so because 'node' is an
5692                                  * argument to the function.
5693                                  */
5694                                 header->attributes |= RDATASET_ATTR_STALE;
5695                                 rbtnode->dirty = 1;
5696                         }
5697                 } else if (EXISTS(header)) {
5698                         if (header->type == matchtype)
5699                                 found = header;
5700                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5701                                  header->type == negtype)
5702                                 found = header;
5703                         else if (header->type == sigmatchtype)
5704                                 foundsig = header;
5705                 }
5706         }
5707         if (found != NULL) {
5708                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5709                 if (!NEGATIVE(found) && foundsig != NULL)
5710                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5711                                       sigrdataset);
5712         }
5713
5714         NODE_UNLOCK(lock, locktype);
5715
5716         if (found == NULL)
5717                 return (ISC_R_NOTFOUND);
5718
5719         if (NEGATIVE(found)) {
5720                 /*
5721                  * We found a negative cache entry.
5722                  */
5723                 if (NXDOMAIN(found))
5724                         result = DNS_R_NCACHENXDOMAIN;
5725                 else
5726                         result = DNS_R_NCACHENXRRSET;
5727         }
5728
5729         return (result);
5730 }
5731
5732 static isc_result_t
5733 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5734              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5735 {
5736         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5737         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5738         rbtdb_version_t *rbtversion = version;
5739         rbtdb_rdatasetiter_t *iterator;
5740         unsigned int refs;
5741
5742         REQUIRE(VALID_RBTDB(rbtdb));
5743
5744         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5745         if (iterator == NULL)
5746                 return (ISC_R_NOMEMORY);
5747
5748         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5749                 now = 0;
5750                 if (rbtversion == NULL)
5751                         currentversion(db,
5752                                  (dns_dbversion_t **) (void *)(&rbtversion));
5753                 else {
5754                         unsigned int refs;
5755
5756                         INSIST(rbtversion->rbtdb == rbtdb);
5757
5758                         isc_refcount_increment(&rbtversion->references,
5759                                                &refs);
5760                         INSIST(refs > 1);
5761                 }
5762         } else {
5763                 if (now == 0)
5764                         isc_stdtime_get(&now);
5765                 rbtversion = NULL;
5766         }
5767
5768         iterator->common.magic = DNS_RDATASETITER_MAGIC;
5769         iterator->common.methods = &rdatasetiter_methods;
5770         iterator->common.db = db;
5771         iterator->common.node = node;
5772         iterator->common.version = (dns_dbversion_t *)rbtversion;
5773         iterator->common.now = now;
5774
5775         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5776
5777         dns_rbtnode_refincrement(rbtnode, &refs);
5778         INSIST(refs != 0);
5779
5780         iterator->current = NULL;
5781
5782         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5783
5784         *iteratorp = (dns_rdatasetiter_t *)iterator;
5785
5786         return (ISC_R_SUCCESS);
5787 }
5788
5789 static isc_boolean_t
5790 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5791         rdatasetheader_t *header, *header_next;
5792         isc_boolean_t cname, other_data;
5793         dns_rdatatype_t rdtype;
5794
5795         /*
5796          * The caller must hold the node lock.
5797          */
5798
5799         /*
5800          * Look for CNAME and "other data" rdatasets active in our version.
5801          */
5802         cname = ISC_FALSE;
5803         other_data = ISC_FALSE;
5804         for (header = node->data; header != NULL; header = header_next) {
5805                 header_next = header->next;
5806                 if (header->type == dns_rdatatype_cname) {
5807                         /*
5808                          * Look for an active extant CNAME.
5809                          */
5810                         do {
5811                                 if (header->serial <= serial &&
5812                                     !IGNORE(header)) {
5813                                         /*
5814                                          * Is this a "this rdataset doesn't
5815                                          * exist" record?
5816                                          */
5817                                         if (NONEXISTENT(header))
5818                                                 header = NULL;
5819                                         break;
5820                                 } else
5821                                         header = header->down;
5822                         } while (header != NULL);
5823                         if (header != NULL)
5824                                 cname = ISC_TRUE;
5825                 } else {
5826                         /*
5827                          * Look for active extant "other data".
5828                          *
5829                          * "Other data" is any rdataset whose type is not
5830                          * KEY, NSEC, SIG or RRSIG.
5831                          */
5832                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
5833                         if (rdtype != dns_rdatatype_key &&
5834                             rdtype != dns_rdatatype_sig &&
5835                             rdtype != dns_rdatatype_nsec &&
5836                             rdtype != dns_rdatatype_rrsig) {
5837                                 /*
5838                                  * Is it active and extant?
5839                                  */
5840                                 do {
5841                                         if (header->serial <= serial &&
5842                                             !IGNORE(header)) {
5843                                                 /*
5844                                                  * Is this a "this rdataset
5845                                                  * doesn't exist" record?
5846                                                  */
5847                                                 if (NONEXISTENT(header))
5848                                                         header = NULL;
5849                                                 break;
5850                                         } else
5851                                                 header = header->down;
5852                                 } while (header != NULL);
5853                                 if (header != NULL)
5854                                         other_data = ISC_TRUE;
5855                         }
5856                 }
5857         }
5858
5859         if (cname && other_data)
5860                 return (ISC_TRUE);
5861
5862         return (ISC_FALSE);
5863 }
5864
5865 static isc_result_t
5866 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5867         isc_result_t result;
5868
5869         INSIST(!IS_CACHE(rbtdb));
5870         INSIST(newheader->heap_index == 0);
5871         INSIST(!ISC_LINK_LINKED(newheader, link));
5872
5873         result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5874         return (result);
5875 }
5876
5877 static isc_result_t
5878 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5879     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5880     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5881 {
5882         rbtdb_changed_t *changed = NULL;
5883         rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
5884         unsigned char *merged;
5885         isc_result_t result;
5886         isc_boolean_t header_nx;
5887         isc_boolean_t newheader_nx;
5888         isc_boolean_t merge;
5889         dns_rdatatype_t rdtype, covers;
5890         rbtdb_rdatatype_t negtype, sigtype;
5891         dns_trust_t trust;
5892         int idx;
5893
5894         /*
5895          * Add an rdatasetheader_t to a node.
5896          */
5897
5898         /*
5899          * Caller must be holding the node lock.
5900          */
5901
5902         if ((options & DNS_DBADD_MERGE) != 0) {
5903                 REQUIRE(rbtversion != NULL);
5904                 merge = ISC_TRUE;
5905         } else
5906                 merge = ISC_FALSE;
5907
5908         if ((options & DNS_DBADD_FORCE) != 0)
5909                 trust = dns_trust_ultimate;
5910         else
5911                 trust = newheader->trust;
5912
5913         if (rbtversion != NULL && !loading) {
5914                 /*
5915                  * We always add a changed record, even if no changes end up
5916                  * being made to this node, because it's harmless and
5917                  * simplifies the code.
5918                  */
5919                 changed = add_changed(rbtdb, rbtversion, rbtnode);
5920                 if (changed == NULL) {
5921                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5922                         return (ISC_R_NOMEMORY);
5923                 }
5924         }
5925
5926         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5927         topheader_prev = NULL;
5928         sigheader = NULL;
5929         negtype = 0;
5930         if (rbtversion == NULL && !newheader_nx) {
5931                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5932                 covers = RBTDB_RDATATYPE_EXT(newheader->type);
5933                 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, covers);
5934                 if (NEGATIVE(newheader)) {
5935                         /*
5936                          * We're adding a negative cache entry.
5937                          */
5938                         for (topheader = rbtnode->data;
5939                              topheader != NULL;
5940                              topheader = topheader->next) {
5941                                 /*
5942                                  * If we're adding an negative cache entry
5943                                  * which covers all types (NXDOMAIN,
5944                                  * NODATA(QTYPE=ANY)).
5945                                  *
5946                                  * We make all other data stale so that the
5947                                  * only rdataset that can be found at this
5948                                  * node is the negative cache entry.
5949                                  *
5950                                  * Otherwise look for any RRSIGs of the
5951                                  * given type so they can be marked stale
5952                                  * later.
5953                                  */
5954                                 if (covers == dns_rdatatype_any) {
5955                                         set_ttl(rbtdb, topheader, 0);
5956                                         topheader->attributes |=
5957                                                 RDATASET_ATTR_STALE;
5958                                         rbtnode->dirty = 1;
5959                                 } else if (topheader->type == sigtype)
5960                                         sigheader = topheader;
5961                         }
5962                         if (covers == dns_rdatatype_any)
5963                                 goto find_header;
5964                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5965                 } else {
5966                         /*
5967                          * We're adding something that isn't a
5968                          * negative cache entry.  Look for an extant
5969                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5970                          * cache entry.  If we're adding an RRSIG, also
5971                          * check for an extant non-stale NODATA ncache
5972                          * entry which covers the same type as the RRSIG.
5973                          */
5974                         for (topheader = rbtnode->data;
5975                              topheader != NULL;
5976                              topheader = topheader->next) {
5977                                 if ((topheader->type ==
5978                                         RBTDB_RDATATYPE_NCACHEANY) ||
5979                                         (newheader->type == sigtype &&
5980                                         topheader->type ==
5981                                         RBTDB_RDATATYPE_VALUE(0, covers))) {
5982                                                 break;
5983                                         }
5984                         }
5985                         if (topheader != NULL && EXISTS(topheader) &&
5986                             topheader->rdh_ttl >= now) {
5987                                 /*
5988                                  * Found one.
5989                                  */
5990                                 if (trust < topheader->trust) {
5991                                         /*
5992                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
5993                                          * is more trusted.
5994                                          */
5995                                         free_rdataset(rbtdb,
5996                                                       rbtdb->common.mctx,
5997                                                       newheader);
5998                                         if (addedrdataset != NULL)
5999                                                 bind_rdataset(rbtdb, rbtnode,
6000                                                               topheader, now,
6001                                                               addedrdataset);
6002                                         return (DNS_R_UNCHANGED);
6003                                 }
6004                                 /*
6005                                  * The new rdataset is better.  Expire the
6006                                  * ncache entry.
6007                                  */
6008                                 set_ttl(rbtdb, topheader, 0);
6009                                 topheader->attributes |= RDATASET_ATTR_STALE;
6010                                 rbtnode->dirty = 1;
6011                                 topheader = NULL;
6012                                 goto find_header;
6013                         }
6014                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
6015                 }
6016         }
6017
6018         for (topheader = rbtnode->data;
6019              topheader != NULL;
6020              topheader = topheader->next) {
6021                 if (topheader->type == newheader->type ||
6022                     topheader->type == negtype)
6023                         break;
6024                 topheader_prev = topheader;
6025         }
6026
6027  find_header:
6028         /*
6029          * If header isn't NULL, we've found the right type.  There may be
6030          * IGNORE rdatasets between the top of the chain and the first real
6031          * data.  We skip over them.
6032          */
6033         header = topheader;
6034         while (header != NULL && IGNORE(header))
6035                 header = header->down;
6036         if (header != NULL) {
6037                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
6038
6039                 /*
6040                  * Deleting an already non-existent rdataset has no effect.
6041                  */
6042                 if (header_nx && newheader_nx) {
6043                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6044                         return (DNS_R_UNCHANGED);
6045                 }
6046
6047                 /*
6048                  * Trying to add an rdataset with lower trust to a cache DB
6049                  * has no effect, provided that the cache data isn't stale.
6050                  */
6051                 if (rbtversion == NULL && trust < header->trust &&
6052                     (header->rdh_ttl >= now || header_nx)) {
6053                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6054                         if (addedrdataset != NULL)
6055                                 bind_rdataset(rbtdb, rbtnode, header, now,
6056                                               addedrdataset);
6057                         return (DNS_R_UNCHANGED);
6058                 }
6059
6060                 /*
6061                  * Don't merge if a nonexistent rdataset is involved.
6062                  */
6063                 if (merge && (header_nx || newheader_nx))
6064                         merge = ISC_FALSE;
6065
6066                 /*
6067                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
6068                  * that is the union of 'newheader' and 'header'.
6069                  */
6070                 if (merge) {
6071                         unsigned int flags = 0;
6072                         INSIST(rbtversion->serial >= header->serial);
6073                         merged = NULL;
6074                         result = ISC_R_SUCCESS;
6075
6076                         if ((options & DNS_DBADD_EXACT) != 0)
6077                                 flags |= DNS_RDATASLAB_EXACT;
6078                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
6079                              newheader->rdh_ttl != header->rdh_ttl)
6080                                         result = DNS_R_NOTEXACT;
6081                         else if (newheader->rdh_ttl != header->rdh_ttl)
6082                                 flags |= DNS_RDATASLAB_FORCE;
6083                         if (result == ISC_R_SUCCESS)
6084                                 result = dns_rdataslab_merge(
6085                                              (unsigned char *)header,
6086                                              (unsigned char *)newheader,
6087                                              (unsigned int)(sizeof(*newheader)),
6088                                              rbtdb->common.mctx,
6089                                              rbtdb->common.rdclass,
6090                                              (dns_rdatatype_t)header->type,
6091                                              flags, &merged);
6092                         if (result == ISC_R_SUCCESS) {
6093                                 /*
6094                                  * If 'header' has the same serial number as
6095                                  * we do, we could clean it up now if we knew
6096                                  * that our caller had no references to it.
6097                                  * We don't know this, however, so we leave it
6098                                  * alone.  It will get cleaned up when
6099                                  * clean_zone_node() runs.
6100                                  */
6101                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6102                                               newheader);
6103                                 newheader = (rdatasetheader_t *)merged;
6104                                 init_rdataset(rbtdb, newheader);
6105                                 if (loading && RESIGN(newheader) &&
6106                                     RESIGN(header) &&
6107                                     header->resign < newheader->resign)
6108                                         newheader->resign = header->resign;
6109                         } else {
6110                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6111                                               newheader);
6112                                 return (result);
6113                         }
6114                 }
6115                 /*
6116                  * Don't replace existing NS, A and AAAA RRsets
6117                  * in the cache if they are already exist.  This
6118                  * prevents named being locked to old servers.
6119                  * Don't lower trust of existing record if the
6120                  * update is forced.
6121                  */
6122                 if (IS_CACHE(rbtdb) && header->rdh_ttl >= now &&
6123                     header->type == dns_rdatatype_ns &&
6124                     !header_nx && !newheader_nx &&
6125                     header->trust >= newheader->trust &&
6126                     dns_rdataslab_equalx((unsigned char *)header,
6127                                          (unsigned char *)newheader,
6128                                          (unsigned int)(sizeof(*newheader)),
6129                                          rbtdb->common.rdclass,
6130                                          (dns_rdatatype_t)header->type)) {
6131                         /*
6132                          * Honour the new ttl if it is less than the
6133                          * older one.
6134                          */
6135                         if (header->rdh_ttl > newheader->rdh_ttl)
6136                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
6137                         if (header->noqname == NULL &&
6138                             newheader->noqname != NULL) {
6139                                 header->noqname = newheader->noqname;
6140                                 newheader->noqname = NULL;
6141                         }
6142                         if (header->closest == NULL &&
6143                             newheader->closest != NULL) {
6144                                 header->closest = newheader->closest;
6145                                 newheader->closest = NULL;
6146                         }
6147                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6148                         if (addedrdataset != NULL)
6149                                 bind_rdataset(rbtdb, rbtnode, header, now,
6150                                               addedrdataset);
6151                         return (ISC_R_SUCCESS);
6152                 }
6153                 /*
6154                  * If we have will be replacing a NS RRset force its TTL
6155                  * to be no more than the current NS RRset's TTL.  This
6156                  * ensures the delegations that are withdrawn are honoured.
6157                  */
6158                 if (IS_CACHE(rbtdb) && header->rdh_ttl >= now &&
6159                     header->type == dns_rdatatype_ns &&
6160                     !header_nx && !newheader_nx &&
6161                     header->trust <= newheader->trust) {
6162                         if (newheader->rdh_ttl > header->rdh_ttl) {
6163                                 newheader->rdh_ttl = header->rdh_ttl;
6164                         }
6165                 }
6166                 if (IS_CACHE(rbtdb) && header->rdh_ttl >= now &&
6167                     (header->type == dns_rdatatype_a ||
6168                      header->type == dns_rdatatype_aaaa ||
6169                      header->type == dns_rdatatype_ds ||
6170                      header->type == RBTDB_RDATATYPE_SIGDDS) &&
6171                     !header_nx && !newheader_nx &&
6172                     header->trust >= newheader->trust &&
6173                     dns_rdataslab_equal((unsigned char *)header,
6174                                         (unsigned char *)newheader,
6175                                         (unsigned int)(sizeof(*newheader)))) {
6176                         /*
6177                          * Honour the new ttl if it is less than the
6178                          * older one.
6179                          */
6180                         if (header->rdh_ttl > newheader->rdh_ttl)
6181                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
6182                         if (header->noqname == NULL &&
6183                             newheader->noqname != NULL) {
6184                                 header->noqname = newheader->noqname;
6185                                 newheader->noqname = NULL;
6186                         }
6187                         if (header->closest == NULL &&
6188                             newheader->closest != NULL) {
6189                                 header->closest = newheader->closest;
6190                                 newheader->closest = NULL;
6191                         }
6192                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6193                         if (addedrdataset != NULL)
6194                                 bind_rdataset(rbtdb, rbtnode, header, now,
6195                                               addedrdataset);
6196                         return (ISC_R_SUCCESS);
6197                 }
6198                 INSIST(rbtversion == NULL ||
6199                        rbtversion->serial >= topheader->serial);
6200                 if (topheader_prev != NULL)
6201                         topheader_prev->next = newheader;
6202                 else
6203                         rbtnode->data = newheader;
6204                 newheader->next = topheader->next;
6205                 if (loading) {
6206                         /*
6207                          * There are no other references to 'header' when
6208                          * loading, so we MAY clean up 'header' now.
6209                          * Since we don't generate changed records when
6210                          * loading, we MUST clean up 'header' now.
6211                          */
6212                         newheader->down = NULL;
6213                         free_rdataset(rbtdb, rbtdb->common.mctx, header);
6214                 } else {
6215                         newheader->down = topheader;
6216                         topheader->next = newheader;
6217                         rbtnode->dirty = 1;
6218                         if (changed != NULL)
6219                                 changed->dirty = ISC_TRUE;
6220                         if (rbtversion == NULL) {
6221                                 set_ttl(rbtdb, header, 0);
6222                                 header->attributes |= RDATASET_ATTR_STALE;
6223                                 if (sigheader != NULL) {
6224                                         set_ttl(rbtdb, sigheader, 0);
6225                                         sigheader->attributes |=
6226                                                  RDATASET_ATTR_STALE;
6227                                 }
6228                         }
6229                         idx = newheader->node->locknum;
6230                         if (IS_CACHE(rbtdb)) {
6231                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6232                                                  newheader, link);
6233                                 /*
6234                                  * XXXMLG We don't check the return value
6235                                  * here.  If it fails, we will not do TTL
6236                                  * based expiry on this node.  However, we
6237                                  * will do it on the LRU side, so memory
6238                                  * will not leak... for long.
6239                                  */
6240                                 INSIST(rbtdb->heaps != NULL);
6241                                 isc_heap_insert(rbtdb->heaps[idx], newheader);
6242                         } else if (RESIGN(newheader))
6243                                 resign_insert(rbtdb, idx, newheader);
6244                 }
6245         } else {
6246                 /*
6247                  * No non-IGNORED rdatasets of the given type exist at
6248                  * this node.
6249                  */
6250
6251                 /*
6252                  * If we're trying to delete the type, don't bother.
6253                  */
6254                 if (newheader_nx) {
6255                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6256                         return (DNS_R_UNCHANGED);
6257                 }
6258
6259                 if (topheader != NULL) {
6260                         /*
6261                          * We have an list of rdatasets of the given type,
6262                          * but they're all marked IGNORE.  We simply insert
6263                          * the new rdataset at the head of the list.
6264                          *
6265                          * Ignored rdatasets cannot occur during loading, so
6266                          * we INSIST on it.
6267                          */
6268                         INSIST(!loading);
6269                         INSIST(rbtversion == NULL ||
6270                                rbtversion->serial >= topheader->serial);
6271                         if (topheader_prev != NULL)
6272                                 topheader_prev->next = newheader;
6273                         else
6274                                 rbtnode->data = newheader;
6275                         newheader->next = topheader->next;
6276                         newheader->down = topheader;
6277                         topheader->next = newheader;
6278                         rbtnode->dirty = 1;
6279                         if (changed != NULL)
6280                                 changed->dirty = ISC_TRUE;
6281                 } else {
6282                         /*
6283                          * No rdatasets of the given type exist at the node.
6284                          */
6285                         newheader->next = rbtnode->data;
6286                         newheader->down = NULL;
6287                         rbtnode->data = newheader;
6288                 }
6289                 idx = newheader->node->locknum;
6290                 if (IS_CACHE(rbtdb)) {
6291                         ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6292                                          newheader, link);
6293                         isc_heap_insert(rbtdb->heaps[idx], newheader);
6294                 } else if (RESIGN(newheader)) {
6295                         resign_insert(rbtdb, idx, newheader);
6296                 }
6297         }
6298
6299         /*
6300          * Check if the node now contains CNAME and other data.
6301          */
6302         if (rbtversion != NULL &&
6303             cname_and_other_data(rbtnode, rbtversion->serial))
6304                 return (DNS_R_CNAMEANDOTHER);
6305
6306         if (addedrdataset != NULL)
6307                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
6308
6309         return (ISC_R_SUCCESS);
6310 }
6311
6312 static inline isc_boolean_t
6313 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
6314                 rbtdb_rdatatype_t type)
6315 {
6316         if (IS_CACHE(rbtdb)) {
6317                 if (type == dns_rdatatype_dname)
6318                         return (ISC_TRUE);
6319                 else
6320                         return (ISC_FALSE);
6321         } else if (type == dns_rdatatype_dname ||
6322                    (type == dns_rdatatype_ns &&
6323                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
6324                 return (ISC_TRUE);
6325         return (ISC_FALSE);
6326 }
6327
6328 static inline isc_result_t
6329 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6330            dns_rdataset_t *rdataset)
6331 {
6332         struct noqname *noqname;
6333         isc_mem_t *mctx = rbtdb->common.mctx;
6334         dns_name_t name;
6335         dns_rdataset_t neg, negsig;
6336         isc_result_t result;
6337         isc_region_t r;
6338
6339         dns_name_init(&name, NULL);
6340         dns_rdataset_init(&neg);
6341         dns_rdataset_init(&negsig);
6342
6343         result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
6344         RUNTIME_CHECK(result == ISC_R_SUCCESS);
6345
6346         noqname = isc_mem_get(mctx, sizeof(*noqname));
6347         if (noqname == NULL) {
6348                 result = ISC_R_NOMEMORY;
6349                 goto cleanup;
6350         }
6351         dns_name_init(&noqname->name, NULL);
6352         noqname->neg = NULL;
6353         noqname->negsig = NULL;
6354         noqname->type = neg.type;
6355         result = dns_name_dup(&name, mctx, &noqname->name);
6356         if (result != ISC_R_SUCCESS)
6357                 goto cleanup;
6358         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6359         if (result != ISC_R_SUCCESS)
6360                 goto cleanup;
6361         noqname->neg = r.base;
6362         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6363         if (result != ISC_R_SUCCESS)
6364                 goto cleanup;
6365         noqname->negsig = r.base;
6366         dns_rdataset_disassociate(&neg);
6367         dns_rdataset_disassociate(&negsig);
6368         newheader->noqname = noqname;
6369         return (ISC_R_SUCCESS);
6370
6371 cleanup:
6372         dns_rdataset_disassociate(&neg);
6373         dns_rdataset_disassociate(&negsig);
6374         if (noqname != NULL)
6375                 free_noqname(mctx, &noqname);
6376         return(result);
6377 }
6378
6379 static inline isc_result_t
6380 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6381            dns_rdataset_t *rdataset)
6382 {
6383         struct noqname *closest;
6384         isc_mem_t *mctx = rbtdb->common.mctx;
6385         dns_name_t name;
6386         dns_rdataset_t neg, negsig;
6387         isc_result_t result;
6388         isc_region_t r;
6389
6390         dns_name_init(&name, NULL);
6391         dns_rdataset_init(&neg);
6392         dns_rdataset_init(&negsig);
6393
6394         result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
6395         RUNTIME_CHECK(result == ISC_R_SUCCESS);
6396
6397         closest = isc_mem_get(mctx, sizeof(*closest));
6398         if (closest == NULL) {
6399                 result = ISC_R_NOMEMORY;
6400                 goto cleanup;
6401         }
6402         dns_name_init(&closest->name, NULL);
6403         closest->neg = NULL;
6404         closest->negsig = NULL;
6405         closest->type = neg.type;
6406         result = dns_name_dup(&name, mctx, &closest->name);
6407         if (result != ISC_R_SUCCESS)
6408                 goto cleanup;
6409         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6410         if (result != ISC_R_SUCCESS)
6411                 goto cleanup;
6412         closest->neg = r.base;
6413         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6414         if (result != ISC_R_SUCCESS)
6415                 goto cleanup;
6416         closest->negsig = r.base;
6417         dns_rdataset_disassociate(&neg);
6418         dns_rdataset_disassociate(&negsig);
6419         newheader->closest = closest;
6420         return (ISC_R_SUCCESS);
6421
6422  cleanup:
6423         dns_rdataset_disassociate(&neg);
6424         dns_rdataset_disassociate(&negsig);
6425         if (closest != NULL)
6426                 free_noqname(mctx, &closest);
6427         return(result);
6428 }
6429
6430 static dns_dbmethods_t zone_methods;
6431
6432 static isc_result_t
6433 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6434             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
6435             dns_rdataset_t *addedrdataset)
6436 {
6437         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6438         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6439         rbtdb_version_t *rbtversion = version;
6440         isc_region_t region;
6441         rdatasetheader_t *newheader;
6442         rdatasetheader_t *header;
6443         isc_result_t result;
6444         isc_boolean_t delegating;
6445         isc_boolean_t newnsec;
6446         isc_boolean_t tree_locked = ISC_FALSE;
6447         isc_boolean_t cache_is_overmem = ISC_FALSE;
6448
6449         REQUIRE(VALID_RBTDB(rbtdb));
6450         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
6451
6452         if (rbtdb->common.methods == &zone_methods)
6453                 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6454                           (rdataset->type == dns_rdatatype_nsec3 ||
6455                            rdataset->covers == dns_rdatatype_nsec3)) ||
6456                          (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6457                            rdataset->type != dns_rdatatype_nsec3 &&
6458                            rdataset->covers != dns_rdatatype_nsec3)));
6459
6460         if (rbtversion == NULL) {
6461                 if (now == 0)
6462                         isc_stdtime_get(&now);
6463         } else
6464                 now = 0;
6465
6466         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6467                                             &region, sizeof(rdatasetheader_t));
6468         if (result != ISC_R_SUCCESS)
6469                 return (result);
6470
6471         newheader = (rdatasetheader_t *)region.base;
6472         init_rdataset(rbtdb, newheader);
6473         set_ttl(rbtdb, newheader, rdataset->ttl + now);
6474         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6475                                                 rdataset->covers);
6476         newheader->attributes = 0;
6477         newheader->noqname = NULL;
6478         newheader->closest = NULL;
6479         newheader->count = init_count++;
6480         newheader->trust = rdataset->trust;
6481         newheader->additional_auth = NULL;
6482         newheader->additional_glue = NULL;
6483         newheader->last_used = now;
6484         newheader->node = rbtnode;
6485         if (rbtversion != NULL) {
6486                 newheader->serial = rbtversion->serial;
6487                 now = 0;
6488
6489                 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6490                         newheader->attributes |= RDATASET_ATTR_RESIGN;
6491                         newheader->resign = rdataset->resign;
6492                 } else
6493                         newheader->resign = 0;
6494         } else {
6495                 newheader->serial = 1;
6496                 newheader->resign = 0;
6497                 if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
6498                         newheader->attributes |= RDATASET_ATTR_NEGATIVE;
6499                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6500                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6501                 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6502                         newheader->attributes |= RDATASET_ATTR_OPTOUT;
6503                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6504                         result = addnoqname(rbtdb, newheader, rdataset);
6505                         if (result != ISC_R_SUCCESS) {
6506                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6507                                               newheader);
6508                                 return (result);
6509                         }
6510                 }
6511                 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6512                         result = addclosest(rbtdb, newheader, rdataset);
6513                         if (result != ISC_R_SUCCESS) {
6514                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6515                                               newheader);
6516                                 return (result);
6517                         }
6518                 }
6519         }
6520
6521         /*
6522          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6523          * just DNAME for the cache), then we need to set the callback bit
6524          * on the node.
6525          */
6526         if (delegating_type(rbtdb, rbtnode, rdataset->type))
6527                 delegating = ISC_TRUE;
6528         else
6529                 delegating = ISC_FALSE;
6530
6531         /*
6532          * Add to the auxiliary NSEC tree if we're adding an NSEC record.
6533          */
6534         if (rbtnode->nsec != DNS_RBT_NSEC_HAS_NSEC &&
6535             rdataset->type == dns_rdatatype_nsec)
6536                 newnsec = ISC_TRUE;
6537         else
6538                 newnsec = ISC_FALSE;
6539
6540         /*
6541          * If we're adding a delegation type, adding to the auxiliary NSEC tree,
6542          * or the DB is a cache in an overmem state, hold an exclusive lock on
6543          * the tree.  In the latter case the lock does not necessarily have to
6544          * be acquired but it will help purge stale entries more effectively.
6545          */
6546         if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
6547                 cache_is_overmem = ISC_TRUE;
6548         if (delegating || newnsec || cache_is_overmem) {
6549                 tree_locked = ISC_TRUE;
6550                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6551         }
6552
6553         if (cache_is_overmem)
6554                 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6555
6556         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6557                   isc_rwlocktype_write);
6558
6559         if (rbtdb->rrsetstats != NULL) {
6560                 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6561                 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6562         }
6563
6564         if (IS_CACHE(rbtdb)) {
6565                 if (tree_locked)
6566                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6567
6568                 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6569                 if (header && header->rdh_ttl < now - RBTDB_VIRTUAL)
6570                         expire_header(rbtdb, header, tree_locked);
6571
6572                 /*
6573                  * If we've been holding a write lock on the tree just for
6574                  * cleaning, we can release it now.  However, we still need the
6575                  * node lock.
6576                  */
6577                 if (tree_locked && !delegating && !newnsec) {
6578                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6579                         tree_locked = ISC_FALSE;
6580                 }
6581         }
6582
6583         result = ISC_R_SUCCESS;
6584         if (newnsec) {
6585                 dns_fixedname_t fname;
6586                 dns_name_t *name;
6587                 dns_rbtnode_t *nsecnode;
6588
6589                 dns_fixedname_init(&fname);
6590                 name = dns_fixedname_name(&fname);
6591                 dns_rbt_fullnamefromnode(rbtnode, name);
6592                 nsecnode = NULL;
6593                 result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6594                 if (result == ISC_R_SUCCESS) {
6595                         nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6596                         rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6597                 } else if (result == ISC_R_EXISTS) {
6598                         rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6599                         result = ISC_R_SUCCESS;
6600                 }
6601         }
6602
6603         if (result == ISC_R_SUCCESS)
6604                 result = add(rbtdb, rbtnode, rbtversion, newheader, options,
6605                              ISC_FALSE, addedrdataset, now);
6606         if (result == ISC_R_SUCCESS && delegating)
6607                 rbtnode->find_callback = 1;
6608
6609         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6610                     isc_rwlocktype_write);
6611
6612         if (tree_locked)
6613                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6614
6615         /*
6616          * Update the zone's secure status.  If version is non-NULL
6617          * this is deferred until closeversion() is called.
6618          */
6619         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6620                 iszonesecure(db, version, rbtdb->origin_node);
6621
6622         return (result);
6623 }
6624
6625 static isc_result_t
6626 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6627                  dns_rdataset_t *rdataset, unsigned int options,
6628                  dns_rdataset_t *newrdataset)
6629 {
6630         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6631         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6632         rbtdb_version_t *rbtversion = version;
6633         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6634         unsigned char *subresult;
6635         isc_region_t region;
6636         isc_result_t result;
6637         rbtdb_changed_t *changed;
6638
6639         REQUIRE(VALID_RBTDB(rbtdb));
6640         REQUIRE(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
6641
6642         if (rbtdb->common.methods == &zone_methods)
6643                 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6644                           (rdataset->type == dns_rdatatype_nsec3 ||
6645                            rdataset->covers == dns_rdatatype_nsec3)) ||
6646                          (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6647                            rdataset->type != dns_rdatatype_nsec3 &&
6648                            rdataset->covers != dns_rdatatype_nsec3)));
6649
6650         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6651                                             &region,
6652                                             sizeof(rdatasetheader_t));
6653         if (result != ISC_R_SUCCESS)
6654                 return (result);
6655         newheader = (rdatasetheader_t *)region.base;
6656         init_rdataset(rbtdb, newheader);
6657         set_ttl(rbtdb, newheader, rdataset->ttl);
6658         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6659                                                 rdataset->covers);
6660         newheader->attributes = 0;
6661         newheader->serial = rbtversion->serial;
6662         newheader->trust = 0;
6663         newheader->noqname = NULL;
6664         newheader->closest = NULL;
6665         newheader->count = init_count++;
6666         newheader->additional_auth = NULL;
6667         newheader->additional_glue = NULL;
6668         newheader->last_used = 0;
6669         newheader->node = rbtnode;
6670         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6671                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6672                 newheader->resign = rdataset->resign;
6673         } else
6674                 newheader->resign = 0;
6675
6676         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6677                   isc_rwlocktype_write);
6678
6679         changed = add_changed(rbtdb, rbtversion, rbtnode);
6680         if (changed == NULL) {
6681                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6682                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6683                             isc_rwlocktype_write);
6684                 return (ISC_R_NOMEMORY);
6685         }
6686
6687         topheader_prev = NULL;
6688         for (topheader = rbtnode->data;
6689              topheader != NULL;
6690              topheader = topheader->next) {
6691                 if (topheader->type == newheader->type)
6692                         break;
6693                 topheader_prev = topheader;
6694         }
6695         /*
6696          * If header isn't NULL, we've found the right type.  There may be
6697          * IGNORE rdatasets between the top of the chain and the first real
6698          * data.  We skip over them.
6699          */
6700         header = topheader;
6701         while (header != NULL && IGNORE(header))
6702                 header = header->down;
6703         if (header != NULL && EXISTS(header)) {
6704                 unsigned int flags = 0;
6705                 subresult = NULL;
6706                 result = ISC_R_SUCCESS;
6707                 if ((options & DNS_DBSUB_EXACT) != 0) {
6708                         flags |= DNS_RDATASLAB_EXACT;
6709                         if (newheader->rdh_ttl != header->rdh_ttl)
6710                                 result = DNS_R_NOTEXACT;
6711                 }
6712                 if (result == ISC_R_SUCCESS)
6713                         result = dns_rdataslab_subtract(
6714                                         (unsigned char *)header,
6715                                         (unsigned char *)newheader,
6716                                         (unsigned int)(sizeof(*newheader)),
6717                                         rbtdb->common.mctx,
6718                                         rbtdb->common.rdclass,
6719                                         (dns_rdatatype_t)header->type,
6720                                         flags, &subresult);
6721                 if (result == ISC_R_SUCCESS) {
6722                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6723                         newheader = (rdatasetheader_t *)subresult;
6724                         init_rdataset(rbtdb, newheader);
6725                         /*
6726                          * We have to set the serial since the rdataslab
6727                          * subtraction routine copies the reserved portion of
6728                          * header, not newheader.
6729                          */
6730                         newheader->serial = rbtversion->serial;
6731                         /*
6732                          * XXXJT: dns_rdataslab_subtract() copied the pointers
6733                          * to additional info.  We need to clear these fields
6734                          * to avoid having duplicated references.
6735                          */
6736                         newheader->additional_auth = NULL;
6737                         newheader->additional_glue = NULL;
6738                 } else if (result == DNS_R_NXRRSET) {
6739                         /*
6740                          * This subtraction would remove all of the rdata;
6741                          * add a nonexistent header instead.
6742                          */
6743                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6744                         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6745                         if (newheader == NULL) {
6746                                 result = ISC_R_NOMEMORY;
6747                                 goto unlock;
6748                         }
6749                         set_ttl(rbtdb, newheader, 0);
6750                         newheader->type = topheader->type;
6751                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6752                         newheader->trust = 0;
6753                         newheader->serial = rbtversion->serial;
6754                         newheader->noqname = NULL;
6755                         newheader->closest = NULL;
6756                         newheader->count = 0;
6757                         newheader->additional_auth = NULL;
6758                         newheader->additional_glue = NULL;
6759                         newheader->node = rbtnode;
6760                         newheader->resign = 0;
6761                         newheader->last_used = 0;
6762                 } else {
6763                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6764                         goto unlock;
6765                 }
6766
6767                 /*
6768                  * If we're here, we want to link newheader in front of
6769                  * topheader.
6770                  */
6771                 INSIST(rbtversion->serial >= topheader->serial);
6772                 if (topheader_prev != NULL)
6773                         topheader_prev->next = newheader;
6774                 else
6775                         rbtnode->data = newheader;
6776                 newheader->next = topheader->next;
6777                 newheader->down = topheader;
6778                 topheader->next = newheader;
6779                 rbtnode->dirty = 1;
6780                 changed->dirty = ISC_TRUE;
6781         } else {
6782                 /*
6783                  * The rdataset doesn't exist, so we don't need to do anything
6784                  * to satisfy the deletion request.
6785                  */
6786                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6787                 if ((options & DNS_DBSUB_EXACT) != 0)
6788                         result = DNS_R_NOTEXACT;
6789                 else
6790                         result = DNS_R_UNCHANGED;
6791         }
6792
6793         if (result == ISC_R_SUCCESS && newrdataset != NULL)
6794                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6795
6796  unlock:
6797         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6798                     isc_rwlocktype_write);
6799
6800         /*
6801          * Update the zone's secure status.  If version is non-NULL
6802          * this is deferred until closeversion() is called.
6803          */
6804         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6805                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6806
6807         return (result);
6808 }
6809
6810 static isc_result_t
6811 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6812                dns_rdatatype_t type, dns_rdatatype_t covers)
6813 {
6814         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6815         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6816         rbtdb_version_t *rbtversion = version;
6817         isc_result_t result;
6818         rdatasetheader_t *newheader;
6819
6820         REQUIRE(VALID_RBTDB(rbtdb));
6821         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
6822
6823         if (type == dns_rdatatype_any)
6824                 return (ISC_R_NOTIMPLEMENTED);
6825         if (type == dns_rdatatype_rrsig && covers == 0)
6826                 return (ISC_R_NOTIMPLEMENTED);
6827
6828         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6829         if (newheader == NULL)
6830                 return (ISC_R_NOMEMORY);
6831         set_ttl(rbtdb, newheader, 0);
6832         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6833         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6834         newheader->trust = 0;
6835         newheader->noqname = NULL;
6836         newheader->closest = NULL;
6837         newheader->additional_auth = NULL;
6838         newheader->additional_glue = NULL;
6839         if (rbtversion != NULL)
6840                 newheader->serial = rbtversion->serial;
6841         else
6842                 newheader->serial = 0;
6843         newheader->count = 0;
6844         newheader->last_used = 0;
6845         newheader->node = rbtnode;
6846
6847         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6848                   isc_rwlocktype_write);
6849
6850         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6851                      ISC_FALSE, NULL, 0);
6852
6853         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6854                     isc_rwlocktype_write);
6855
6856         /*
6857          * Update the zone's secure status.  If version is non-NULL
6858          * this is deferred until closeversion() is called.
6859          */
6860         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6861                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6862
6863         return (result);
6864 }
6865
6866 /*
6867  * load a non-NSEC3 node in the main tree and optionally to the auxiliary NSEC
6868  */
6869 static isc_result_t
6870 loadnode(dns_rbtdb_t *rbtdb, dns_name_t *name, dns_rbtnode_t **nodep,
6871          isc_boolean_t hasnsec)
6872 {
6873         isc_result_t noderesult, nsecresult, tmpresult;
6874         dns_rbtnode_t *nsecnode = NULL, *node = NULL;
6875
6876         noderesult = dns_rbt_addnode(rbtdb->tree, name, &node);
6877         if (!hasnsec)
6878                 goto done;
6879         if (noderesult == ISC_R_EXISTS) {
6880                 /*
6881                  * Add a node to the auxiliary NSEC tree for an old node
6882                  * just now getting an NSEC record.
6883                  */
6884                 if (node->nsec == DNS_RBT_NSEC_HAS_NSEC)
6885                         goto done;
6886         } else if (noderesult != ISC_R_SUCCESS)
6887                 goto done;
6888
6889         /*
6890          * Build the auxiliary tree for NSECs as we go.
6891          * This tree speeds searches for closest NSECs that would otherwise
6892          * need to examine many irrelevant nodes in large TLDs.
6893          *
6894          * Add nodes to the auxiliary tree after corresponding nodes have
6895          * been added to the main tree.
6896          */
6897         nsecresult = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6898         if (nsecresult == ISC_R_SUCCESS) {
6899                 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6900                 node->nsec = DNS_RBT_NSEC_HAS_NSEC;
6901                 goto done;
6902         }
6903
6904         if (nsecresult == ISC_R_EXISTS) {
6905 #if 1 /* 0 */
6906                 isc_log_write(dns_lctx,
6907                               DNS_LOGCATEGORY_DATABASE,
6908                               DNS_LOGMODULE_CACHE,
6909                               ISC_LOG_WARNING,
6910                               "addnode: NSEC node already exists");
6911 #endif
6912                 node->nsec = DNS_RBT_NSEC_HAS_NSEC;
6913                 goto done;
6914         }
6915
6916         if (noderesult == ISC_R_SUCCESS) {
6917                 /*
6918                  * Remove the node we just added above.
6919                  */
6920                 tmpresult = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
6921                 if (tmpresult != ISC_R_SUCCESS)
6922                         isc_log_write(dns_lctx,
6923                                       DNS_LOGCATEGORY_DATABASE,
6924                                       DNS_LOGMODULE_CACHE,
6925                                       ISC_LOG_WARNING,
6926                                       "loading_addrdataset: "
6927                                       "dns_rbt_deletenode: %s after "
6928                                       "dns_rbt_addnode(NSEC): %s",
6929                                       isc_result_totext(tmpresult),
6930                                       isc_result_totext(noderesult));
6931
6932         }
6933
6934         /*
6935          * Set the error condition to be returned.
6936          */
6937         noderesult = nsecresult;
6938
6939  done:
6940 #ifdef BIND9
6941         if (noderesult == ISC_R_SUCCESS && rbtdb->rpz_cidr != NULL)
6942                 dns_rpz_cidr_addip(rbtdb->rpz_cidr, name);
6943 #endif
6944         if (noderesult == ISC_R_SUCCESS || noderesult == ISC_R_EXISTS)
6945                 *nodep = node;
6946
6947         return (noderesult);
6948 }
6949
6950 static isc_result_t
6951 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6952         rbtdb_load_t *loadctx = arg;
6953         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6954         dns_rbtnode_t *node;
6955         isc_result_t result;
6956         isc_region_t region;
6957         rdatasetheader_t *newheader;
6958
6959         /*
6960          * This routine does no node locking.  See comments in
6961          * 'load' below for more information on loading and
6962          * locking.
6963          */
6964
6965
6966         /*
6967          * SOA records are only allowed at top of zone.
6968          */
6969         if (rdataset->type == dns_rdatatype_soa &&
6970             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6971                 return (DNS_R_NOTZONETOP);
6972
6973         if (rdataset->type != dns_rdatatype_nsec3 &&
6974             rdataset->covers != dns_rdatatype_nsec3)
6975                 add_empty_wildcards(rbtdb, name);
6976
6977         if (dns_name_iswildcard(name)) {
6978                 /*
6979                  * NS record owners cannot legally be wild cards.
6980                  */
6981                 if (rdataset->type == dns_rdatatype_ns)
6982                         return (DNS_R_INVALIDNS);
6983                 /*
6984                  * NSEC3 record owners cannot legally be wild cards.
6985                  */
6986                 if (rdataset->type == dns_rdatatype_nsec3)
6987                         return (DNS_R_INVALIDNSEC3);
6988                 result = add_wildcard_magic(rbtdb, name);
6989                 if (result != ISC_R_SUCCESS)
6990                         return (result);
6991         }
6992
6993         node = NULL;
6994         if (rdataset->type == dns_rdatatype_nsec3 ||
6995             rdataset->covers == dns_rdatatype_nsec3) {
6996                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6997                 if (result == ISC_R_SUCCESS)
6998                         node->nsec = DNS_RBT_NSEC_NSEC3;
6999         } else if (rdataset->type == dns_rdatatype_nsec) {
7000                 result = loadnode(rbtdb, name, &node, ISC_TRUE);
7001         } else {
7002                 result = loadnode(rbtdb, name, &node, ISC_FALSE);
7003         }
7004         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
7005                 return (result);
7006         if (result == ISC_R_SUCCESS) {
7007                 dns_name_t foundname;
7008                 dns_name_init(&foundname, NULL);
7009                 dns_rbt_namefromnode(node, &foundname);
7010 #ifdef DNS_RBT_USEHASH
7011                 node->locknum = node->hashval % rbtdb->node_lock_count;
7012 #else
7013                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
7014                         rbtdb->node_lock_count;
7015 #endif
7016         }
7017
7018         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
7019                                             &region,
7020                                             sizeof(rdatasetheader_t));
7021         if (result != ISC_R_SUCCESS)
7022                 return (result);
7023         newheader = (rdatasetheader_t *)region.base;
7024         init_rdataset(rbtdb, newheader);
7025         set_ttl(rbtdb, newheader,
7026                 rdataset->ttl + loadctx->now); /* XXX overflow check */
7027         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
7028                                                 rdataset->covers);
7029         newheader->attributes = 0;
7030         newheader->trust = rdataset->trust;
7031         newheader->serial = 1;
7032         newheader->noqname = NULL;
7033         newheader->closest = NULL;
7034         newheader->count = init_count++;
7035         newheader->additional_auth = NULL;
7036         newheader->additional_glue = NULL;
7037         newheader->last_used = 0;
7038         newheader->node = node;
7039         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
7040                 newheader->attributes |= RDATASET_ATTR_RESIGN;
7041                 newheader->resign = rdataset->resign;
7042         } else
7043                 newheader->resign = 0;
7044
7045         result = add(rbtdb, node, rbtdb->current_version, newheader,
7046                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
7047         if (result == ISC_R_SUCCESS &&
7048             delegating_type(rbtdb, node, rdataset->type))
7049                 node->find_callback = 1;
7050         else if (result == DNS_R_UNCHANGED)
7051                 result = ISC_R_SUCCESS;
7052
7053         return (result);
7054 }
7055
7056 static isc_result_t
7057 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
7058         rbtdb_load_t *loadctx;
7059         dns_rbtdb_t *rbtdb;
7060
7061         rbtdb = (dns_rbtdb_t *)db;
7062
7063         REQUIRE(VALID_RBTDB(rbtdb));
7064
7065         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
7066         if (loadctx == NULL)
7067                 return (ISC_R_NOMEMORY);
7068
7069         loadctx->rbtdb = rbtdb;
7070         if (IS_CACHE(rbtdb))
7071                 isc_stdtime_get(&loadctx->now);
7072         else
7073                 loadctx->now = 0;
7074
7075         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7076
7077         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
7078                 == 0);
7079         rbtdb->attributes |= RBTDB_ATTR_LOADING;
7080
7081         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7082
7083         *addp = loading_addrdataset;
7084         *dbloadp = loadctx;
7085
7086         return (ISC_R_SUCCESS);
7087 }
7088
7089 static isc_result_t
7090 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
7091         rbtdb_load_t *loadctx;
7092         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7093
7094         REQUIRE(VALID_RBTDB(rbtdb));
7095         REQUIRE(dbloadp != NULL);
7096         loadctx = *dbloadp;
7097         REQUIRE(loadctx->rbtdb == rbtdb);
7098
7099         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7100
7101         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
7102         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
7103
7104         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
7105         rbtdb->attributes |= RBTDB_ATTR_LOADED;
7106
7107         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7108
7109         /*
7110          * If there's a KEY rdataset at the zone origin containing a
7111          * zone key, we consider the zone secure.
7112          */
7113         if (! IS_CACHE(rbtdb))
7114                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
7115
7116         *dbloadp = NULL;
7117
7118         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
7119
7120         return (ISC_R_SUCCESS);
7121 }
7122
7123 static isc_result_t
7124 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
7125      dns_masterformat_t masterformat) {
7126         dns_rbtdb_t *rbtdb;
7127         rbtdb_version_t *rbtversion = version;
7128
7129         rbtdb = (dns_rbtdb_t *)db;
7130
7131         REQUIRE(VALID_RBTDB(rbtdb));
7132         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
7133
7134 #ifdef BIND9
7135         return (dns_master_dump2(rbtdb->common.mctx, db, version,
7136                                  &dns_master_style_default,
7137                                  filename, masterformat));
7138 #else
7139         UNUSED(version);
7140         UNUSED(filename);
7141         UNUSED(masterformat);
7142
7143         return (ISC_R_NOTIMPLEMENTED);
7144 #endif /* BIND9 */
7145 }
7146
7147 static void
7148 delete_callback(void *data, void *arg) {
7149         dns_rbtdb_t *rbtdb = arg;
7150         rdatasetheader_t *current, *next;
7151         unsigned int locknum;
7152
7153         current = data;
7154         locknum = current->node->locknum;
7155         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
7156         while (current != NULL) {
7157                 next = current->next;
7158                 free_rdataset(rbtdb, rbtdb->common.mctx, current);
7159                 current = next;
7160         }
7161         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
7162 }
7163
7164 static isc_boolean_t
7165 issecure(dns_db_t *db) {
7166         dns_rbtdb_t *rbtdb;
7167         isc_boolean_t secure;
7168
7169         rbtdb = (dns_rbtdb_t *)db;
7170
7171         REQUIRE(VALID_RBTDB(rbtdb));
7172
7173         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7174         secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
7175         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7176
7177         return (secure);
7178 }
7179
7180 static isc_boolean_t
7181 isdnssec(dns_db_t *db) {
7182         dns_rbtdb_t *rbtdb;
7183         isc_boolean_t dnssec;
7184
7185         rbtdb = (dns_rbtdb_t *)db;
7186
7187         REQUIRE(VALID_RBTDB(rbtdb));
7188
7189         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7190         dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
7191         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7192
7193         return (dnssec);
7194 }
7195
7196 static unsigned int
7197 nodecount(dns_db_t *db) {
7198         dns_rbtdb_t *rbtdb;
7199         unsigned int count;
7200
7201         rbtdb = (dns_rbtdb_t *)db;
7202
7203         REQUIRE(VALID_RBTDB(rbtdb));
7204
7205         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7206         count = dns_rbt_nodecount(rbtdb->tree);
7207         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7208
7209         return (count);
7210 }
7211
7212 static void
7213 settask(dns_db_t *db, isc_task_t *task) {
7214         dns_rbtdb_t *rbtdb;
7215
7216         rbtdb = (dns_rbtdb_t *)db;
7217
7218         REQUIRE(VALID_RBTDB(rbtdb));
7219
7220         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7221         if (rbtdb->task != NULL)
7222                 isc_task_detach(&rbtdb->task);
7223         if (task != NULL)
7224                 isc_task_attach(task, &rbtdb->task);
7225         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7226 }
7227
7228 static isc_boolean_t
7229 ispersistent(dns_db_t *db) {
7230         UNUSED(db);
7231         return (ISC_FALSE);
7232 }
7233
7234 static isc_result_t
7235 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
7236         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7237         dns_rbtnode_t *onode;
7238         isc_result_t result = ISC_R_SUCCESS;
7239
7240         REQUIRE(VALID_RBTDB(rbtdb));
7241         REQUIRE(nodep != NULL && *nodep == NULL);
7242
7243         /* Note that the access to origin_node doesn't require a DB lock */
7244         onode = (dns_rbtnode_t *)rbtdb->origin_node;
7245         if (onode != NULL) {
7246                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
7247                 new_reference(rbtdb, onode);
7248                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
7249
7250                 *nodep = rbtdb->origin_node;
7251         } else {
7252                 INSIST(IS_CACHE(rbtdb));
7253                 result = ISC_R_NOTFOUND;
7254         }
7255
7256         return (result);
7257 }
7258
7259 static isc_result_t
7260 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
7261                    isc_uint8_t *flags, isc_uint16_t *iterations,
7262                    unsigned char *salt, size_t *salt_length)
7263 {
7264         dns_rbtdb_t *rbtdb;
7265         isc_result_t result = ISC_R_NOTFOUND;
7266         rbtdb_version_t *rbtversion = version;
7267
7268         rbtdb = (dns_rbtdb_t *)db;
7269
7270         REQUIRE(VALID_RBTDB(rbtdb));
7271         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
7272
7273         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7274
7275         if (rbtversion == NULL)
7276                 rbtversion = rbtdb->current_version;
7277
7278         if (rbtversion->havensec3) {
7279                 if (hash != NULL)
7280                         *hash = rbtversion->hash;
7281                 if (salt != NULL && salt_length != NULL) {
7282                         REQUIRE(*salt_length >= rbtversion->salt_length);
7283                         memmove(salt, rbtversion->salt,
7284                                 rbtversion->salt_length);
7285                 }
7286                 if (salt_length != NULL)
7287                         *salt_length = rbtversion->salt_length;
7288                 if (iterations != NULL)
7289                         *iterations = rbtversion->iterations;
7290                 if (flags != NULL)
7291                         *flags = rbtversion->flags;
7292                 result = ISC_R_SUCCESS;
7293         }
7294         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7295
7296         return (result);
7297 }
7298
7299 static isc_result_t
7300 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
7301         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7302         isc_stdtime_t oldresign;
7303         isc_result_t result = ISC_R_SUCCESS;
7304         rdatasetheader_t *header;
7305
7306         REQUIRE(VALID_RBTDB(rbtdb));
7307         REQUIRE(!IS_CACHE(rbtdb));
7308         REQUIRE(rdataset != NULL);
7309
7310         header = rdataset->private3;
7311         header--;
7312
7313         NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
7314                   isc_rwlocktype_write);
7315
7316         oldresign = header->resign;
7317         header->resign = resign;
7318         if (header->heap_index != 0) {
7319                 INSIST(RESIGN(header));
7320                 if (resign == 0) {
7321                         isc_heap_delete(rbtdb->heaps[header->node->locknum],
7322                                         header->heap_index);
7323                         header->heap_index = 0;
7324                 } else if (resign < oldresign)
7325                         isc_heap_increased(rbtdb->heaps[header->node->locknum],
7326                                            header->heap_index);
7327                 else if (resign > oldresign)
7328                         isc_heap_decreased(rbtdb->heaps[header->node->locknum],
7329                                            header->heap_index);
7330         } else if (resign && header->heap_index == 0) {
7331                 header->attributes |= RDATASET_ATTR_RESIGN;
7332                 result = resign_insert(rbtdb, header->node->locknum, header);
7333         }
7334         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7335                     isc_rwlocktype_write);
7336         return (result);
7337 }
7338
7339 static isc_result_t
7340 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
7341                dns_name_t *foundname)
7342 {
7343         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7344         rdatasetheader_t *header = NULL, *this;
7345         unsigned int i;
7346         isc_result_t result = ISC_R_NOTFOUND;
7347         unsigned int locknum;
7348
7349         REQUIRE(VALID_RBTDB(rbtdb));
7350
7351         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7352
7353         for (i = 0; i < rbtdb->node_lock_count; i++) {
7354                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
7355                 this = isc_heap_element(rbtdb->heaps[i], 1);
7356                 if (this == NULL) {
7357                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7358                                     isc_rwlocktype_read);
7359                         continue;
7360                 }
7361                 if (header == NULL)
7362                         header = this;
7363                 else if (isc_serial_lt(this->resign, header->resign)) {
7364                         locknum = header->node->locknum;
7365                         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
7366                                     isc_rwlocktype_read);
7367                         header = this;
7368                 } else
7369                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7370                                     isc_rwlocktype_read);
7371         }
7372
7373         if (header == NULL)
7374                 goto unlock;
7375
7376         bind_rdataset(rbtdb, header->node, header, 0, rdataset);
7377
7378         if (foundname != NULL)
7379                 dns_rbt_fullnamefromnode(header->node, foundname);
7380
7381         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7382                     isc_rwlocktype_read);
7383
7384         result = ISC_R_SUCCESS;
7385
7386  unlock:
7387         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7388
7389         return (result);
7390 }
7391
7392 static void
7393 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
7394 {
7395         rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
7396         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7397         dns_rbtnode_t *node;
7398         rdatasetheader_t *header;
7399
7400         REQUIRE(VALID_RBTDB(rbtdb));
7401         REQUIRE(rdataset != NULL);
7402         REQUIRE(rdataset->methods == &rdataset_methods);
7403         REQUIRE(rbtdb->future_version == rbtversion);
7404         REQUIRE(rbtversion != NULL);
7405         REQUIRE(rbtversion->writer);
7406         REQUIRE(rbtversion->rbtdb == rbtdb);
7407
7408         node = rdataset->private2;
7409         INSIST(node != NULL);
7410         header = rdataset->private3;
7411         INSIST(header != NULL);
7412         header--;
7413
7414         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7415         NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
7416                   isc_rwlocktype_write);
7417         /*
7418          * Delete from heap and save to re-signed list so that it can
7419          * be restored if we backout of this change.
7420          */
7421         new_reference(rbtdb, node);
7422         isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
7423         header->heap_index = 0;
7424         ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
7425
7426         NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
7427                     isc_rwlocktype_write);
7428         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7429 }
7430
7431 static dns_stats_t *
7432 getrrsetstats(dns_db_t *db) {
7433         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7434
7435         REQUIRE(VALID_RBTDB(rbtdb));
7436         REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
7437
7438         return (rbtdb->rrsetstats);
7439 }
7440
7441 static dns_dbmethods_t zone_methods = {
7442         attach,
7443         detach,
7444         beginload,
7445         endload,
7446         dump,
7447         currentversion,
7448         newversion,
7449         attachversion,
7450         closeversion,
7451         findnode,
7452         zone_find,
7453         zone_findzonecut,
7454         attachnode,
7455         detachnode,
7456         expirenode,
7457         printnode,
7458         createiterator,
7459         zone_findrdataset,
7460         allrdatasets,
7461         addrdataset,
7462         subtractrdataset,
7463         deleterdataset,
7464         issecure,
7465         nodecount,
7466         ispersistent,
7467         overmem,
7468         settask,
7469         getoriginnode,
7470         NULL,
7471         getnsec3parameters,
7472         findnsec3node,
7473         setsigningtime,
7474         getsigningtime,
7475         resigned,
7476         isdnssec,
7477         NULL,
7478 #ifdef BIND9
7479         rpz_enabled,
7480         rpz_findips,
7481 #else
7482         NULL,
7483         NULL,
7484 #endif
7485         NULL,
7486         NULL
7487 };
7488
7489 static dns_dbmethods_t cache_methods = {
7490         attach,
7491         detach,
7492         beginload,
7493         endload,
7494         dump,
7495         currentversion,
7496         newversion,
7497         attachversion,
7498         closeversion,
7499         findnode,
7500         cache_find,
7501         cache_findzonecut,
7502         attachnode,
7503         detachnode,
7504         expirenode,
7505         printnode,
7506         createiterator,
7507         cache_findrdataset,
7508         allrdatasets,
7509         addrdataset,
7510         subtractrdataset,
7511         deleterdataset,
7512         issecure,
7513         nodecount,
7514         ispersistent,
7515         overmem,
7516         settask,
7517         getoriginnode,
7518         NULL,
7519         NULL,
7520         NULL,
7521         NULL,
7522         NULL,
7523         NULL,
7524         isdnssec,
7525         getrrsetstats,
7526         NULL,
7527         NULL,
7528         NULL,
7529         NULL
7530 };
7531
7532 isc_result_t
7533 #ifdef DNS_RBTDB_VERSION64
7534 dns_rbtdb64_create
7535 #else
7536 dns_rbtdb_create
7537 #endif
7538                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
7539                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
7540                  void *driverarg, dns_db_t **dbp)
7541 {
7542         dns_rbtdb_t *rbtdb;
7543         isc_result_t result;
7544         int i;
7545         dns_name_t name;
7546         isc_boolean_t (*sooner)(void *, void *);
7547         isc_mem_t *hmctx = mctx;
7548
7549         /* Keep the compiler happy. */
7550         UNUSED(driverarg);
7551
7552         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
7553         if (rbtdb == NULL)
7554                 return (ISC_R_NOMEMORY);
7555
7556         /*
7557          * If argv[0] exists, it points to a memory context to use for heap
7558          */
7559         if (argc != 0)
7560                 hmctx = (isc_mem_t *) argv[0];
7561
7562         memset(rbtdb, '\0', sizeof(*rbtdb));
7563         dns_name_init(&rbtdb->common.origin, NULL);
7564         rbtdb->common.attributes = 0;
7565         if (type == dns_dbtype_cache) {
7566                 rbtdb->common.methods = &cache_methods;
7567                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
7568         } else if (type == dns_dbtype_stub) {
7569                 rbtdb->common.methods = &zone_methods;
7570                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
7571         } else
7572                 rbtdb->common.methods = &zone_methods;
7573         rbtdb->common.rdclass = rdclass;
7574         rbtdb->common.mctx = NULL;
7575
7576         result = RBTDB_INITLOCK(&rbtdb->lock);
7577         if (result != ISC_R_SUCCESS)
7578                 goto cleanup_rbtdb;
7579
7580         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
7581         if (result != ISC_R_SUCCESS)
7582                 goto cleanup_lock;
7583
7584         /*
7585          * Initialize node_lock_count in a generic way to support future
7586          * extension which allows the user to specify this value on creation.
7587          * Note that when specified for a cache DB it must be larger than 1
7588          * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
7589          */
7590         if (rbtdb->node_lock_count == 0) {
7591                 if (IS_CACHE(rbtdb))
7592                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
7593                 else
7594                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
7595         } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
7596                 result = ISC_R_RANGE;
7597                 goto cleanup_tree_lock;
7598         }
7599         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
7600         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
7601                                         sizeof(rbtdb_nodelock_t));
7602         if (rbtdb->node_locks == NULL) {
7603                 result = ISC_R_NOMEMORY;
7604                 goto cleanup_tree_lock;
7605         }
7606
7607         rbtdb->rrsetstats = NULL;
7608         if (IS_CACHE(rbtdb)) {
7609                 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
7610                 if (result != ISC_R_SUCCESS)
7611                         goto cleanup_node_locks;
7612                 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
7613                                                sizeof(rdatasetheaderlist_t));
7614                 if (rbtdb->rdatasets == NULL) {
7615                         result = ISC_R_NOMEMORY;
7616                         goto cleanup_rrsetstats;
7617                 }
7618                 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7619                         ISC_LIST_INIT(rbtdb->rdatasets[i]);
7620         } else
7621                 rbtdb->rdatasets = NULL;
7622
7623         /*
7624          * Create the heaps.
7625          */
7626         rbtdb->heaps = isc_mem_get(hmctx, rbtdb->node_lock_count *
7627                                    sizeof(isc_heap_t *));
7628         if (rbtdb->heaps == NULL) {
7629                 result = ISC_R_NOMEMORY;
7630                 goto cleanup_rdatasets;
7631         }
7632         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7633                 rbtdb->heaps[i] = NULL;
7634         sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
7635         for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
7636                 result = isc_heap_create(hmctx, sooner, set_index, 0,
7637                                          &rbtdb->heaps[i]);
7638                 if (result != ISC_R_SUCCESS)
7639                         goto cleanup_heaps;
7640         }
7641
7642         /*
7643          * Create deadnode lists.
7644          */
7645         rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
7646                                        sizeof(rbtnodelist_t));
7647         if (rbtdb->deadnodes == NULL) {
7648                 result = ISC_R_NOMEMORY;
7649                 goto cleanup_heaps;
7650         }
7651         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7652                 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7653
7654         rbtdb->active = rbtdb->node_lock_count;
7655
7656         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7657                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7658                 if (result == ISC_R_SUCCESS) {
7659                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7660                         if (result != ISC_R_SUCCESS)
7661                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7662                 }
7663                 if (result != ISC_R_SUCCESS) {
7664                         while (i-- > 0) {
7665                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7666                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7667                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7668                         }
7669                         goto cleanup_deadnodes;
7670                 }
7671                 rbtdb->node_locks[i].exiting = ISC_FALSE;
7672         }
7673
7674         /*
7675          * Attach to the mctx.  The database will persist so long as there
7676          * are references to it, and attaching to the mctx ensures that our
7677          * mctx won't disappear out from under us.
7678          */
7679         isc_mem_attach(mctx, &rbtdb->common.mctx);
7680         isc_mem_attach(hmctx, &rbtdb->hmctx);
7681
7682         /*
7683          * Must be initialized before free_rbtdb() is called.
7684          */
7685         isc_ondestroy_init(&rbtdb->common.ondest);
7686
7687         /*
7688          * Make a copy of the origin name.
7689          */
7690         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7691         if (result != ISC_R_SUCCESS) {
7692                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7693                 return (result);
7694         }
7695
7696         /*
7697          * Make the Red-Black Trees.
7698          */
7699         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7700         if (result != ISC_R_SUCCESS) {
7701                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7702                 return (result);
7703         }
7704
7705         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec);
7706         if (result != ISC_R_SUCCESS) {
7707                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7708                 return (result);
7709         }
7710
7711         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7712         if (result != ISC_R_SUCCESS) {
7713                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7714                 return (result);
7715         }
7716
7717         /*
7718          * In order to set the node callback bit correctly in zone databases,
7719          * we need to know if the node has the origin name of the zone.
7720          * In loading_addrdataset() we could simply compare the new name
7721          * to the origin name, but this is expensive.  Also, we don't know the
7722          * node name in addrdataset(), so we need another way of knowing the
7723          * zone's top.
7724          *
7725          * We now explicitly create a node for the zone's origin, and then
7726          * we simply remember the node's address.  This is safe, because
7727          * the top-of-zone node can never be deleted, nor can its address
7728          * change.
7729          */
7730         if (!IS_CACHE(rbtdb)) {
7731                 dns_rbtnode_t *nsec3node;
7732
7733                 rbtdb->origin_node = NULL;
7734                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7735                                          &rbtdb->origin_node);
7736                 if (result != ISC_R_SUCCESS) {
7737                         INSIST(result != ISC_R_EXISTS);
7738                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7739                         return (result);
7740                 }
7741                 rbtdb->origin_node->nsec = DNS_RBT_NSEC_NORMAL;
7742                 /*
7743                  * We need to give the origin node the right locknum.
7744                  */
7745                 dns_name_init(&name, NULL);
7746                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7747 #ifdef DNS_RBT_USEHASH
7748                 rbtdb->origin_node->locknum =
7749                         rbtdb->origin_node->hashval %
7750                         rbtdb->node_lock_count;
7751 #else
7752                 rbtdb->origin_node->locknum =
7753                         dns_name_hash(&name, ISC_TRUE) %
7754                         rbtdb->node_lock_count;
7755 #endif
7756                 /*
7757                  * Add an apex node to the NSEC3 tree so that NSEC3 searches
7758                  * return partial matches when there is only a single NSEC3
7759                  * record in the tree.
7760                  */
7761                 nsec3node = NULL;
7762                 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
7763                                          &nsec3node);
7764                 if (result != ISC_R_SUCCESS) {
7765                         INSIST(result != ISC_R_EXISTS);
7766                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7767                         return (result);
7768                 }
7769                 nsec3node->nsec = DNS_RBT_NSEC_NSEC3;
7770                 /*
7771                  * We need to give the nsec3 origin node the right locknum.
7772                  */
7773                 dns_name_init(&name, NULL);
7774                 dns_rbt_namefromnode(nsec3node, &name);
7775 #ifdef DNS_RBT_USEHASH
7776                 nsec3node->locknum = nsec3node->hashval %
7777                         rbtdb->node_lock_count;
7778 #else
7779                 nsec3node->locknum = dns_name_hash(&name, ISC_TRUE) %
7780                         rbtdb->node_lock_count;
7781 #endif
7782         }
7783
7784         /*
7785          * Misc. Initialization.
7786          */
7787         result = isc_refcount_init(&rbtdb->references, 1);
7788         if (result != ISC_R_SUCCESS) {
7789                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7790                 return (result);
7791         }
7792         rbtdb->attributes = 0;
7793         rbtdb->task = NULL;
7794
7795         /*
7796          * Version Initialization.
7797          */
7798         rbtdb->current_serial = 1;
7799         rbtdb->least_serial = 1;
7800         rbtdb->next_serial = 2;
7801         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7802         if (rbtdb->current_version == NULL) {
7803                 isc_refcount_decrement(&rbtdb->references, NULL);
7804                 isc_refcount_destroy(&rbtdb->references);
7805                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7806                 return (ISC_R_NOMEMORY);
7807         }
7808         rbtdb->current_version->rbtdb = rbtdb;
7809         rbtdb->current_version->secure = dns_db_insecure;
7810         rbtdb->current_version->havensec3 = ISC_FALSE;
7811         rbtdb->current_version->flags = 0;
7812         rbtdb->current_version->iterations = 0;
7813         rbtdb->current_version->hash = 0;
7814         rbtdb->current_version->salt_length = 0;
7815         memset(rbtdb->current_version->salt, 0,
7816                sizeof(rbtdb->current_version->salt));
7817         rbtdb->future_version = NULL;
7818         ISC_LIST_INIT(rbtdb->open_versions);
7819         /*
7820          * Keep the current version in the open list so that list operation
7821          * won't happen in normal lookup operations.
7822          */
7823         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7824
7825         rbtdb->common.magic = DNS_DB_MAGIC;
7826         rbtdb->common.impmagic = RBTDB_MAGIC;
7827
7828         *dbp = (dns_db_t *)rbtdb;
7829
7830         return (ISC_R_SUCCESS);
7831
7832  cleanup_deadnodes:
7833         isc_mem_put(mctx, rbtdb->deadnodes,
7834                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7835
7836  cleanup_heaps:
7837         if (rbtdb->heaps != NULL) {
7838                 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7839                         if (rbtdb->heaps[i] != NULL)
7840                                 isc_heap_destroy(&rbtdb->heaps[i]);
7841                 isc_mem_put(hmctx, rbtdb->heaps,
7842                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
7843         }
7844
7845  cleanup_rdatasets:
7846         if (rbtdb->rdatasets != NULL)
7847                 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7848                             sizeof(rdatasetheaderlist_t));
7849  cleanup_rrsetstats:
7850         if (rbtdb->rrsetstats != NULL)
7851                 dns_stats_detach(&rbtdb->rrsetstats);
7852
7853  cleanup_node_locks:
7854         isc_mem_put(mctx, rbtdb->node_locks,
7855                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7856
7857  cleanup_tree_lock:
7858         isc_rwlock_destroy(&rbtdb->tree_lock);
7859
7860  cleanup_lock:
7861         RBTDB_DESTROYLOCK(&rbtdb->lock);
7862
7863  cleanup_rbtdb:
7864         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
7865         return (result);
7866 }
7867
7868
7869 /*
7870  * Slabbed Rdataset Methods
7871  */
7872
7873 static void
7874 rdataset_disassociate(dns_rdataset_t *rdataset) {
7875         dns_db_t *db = rdataset->private1;
7876         dns_dbnode_t *node = rdataset->private2;
7877
7878         detachnode(db, &node);
7879 }
7880
7881 static isc_result_t
7882 rdataset_first(dns_rdataset_t *rdataset) {
7883         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7884         unsigned int count;
7885
7886         count = raw[0] * 256 + raw[1];
7887         if (count == 0) {
7888                 rdataset->private5 = NULL;
7889                 return (ISC_R_NOMORE);
7890         }
7891
7892 #if DNS_RDATASET_FIXED
7893         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7894                 raw += 2 + (4 * count);
7895         else
7896 #endif
7897                 raw += 2;
7898
7899         /*
7900          * The privateuint4 field is the number of rdata beyond the
7901          * cursor position, so we decrement the total count by one
7902          * before storing it.
7903          *
7904          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7905          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7906          * to the first entry in the offset table.
7907          */
7908         count--;
7909         rdataset->privateuint4 = count;
7910         rdataset->private5 = raw;
7911
7912         return (ISC_R_SUCCESS);
7913 }
7914
7915 static isc_result_t
7916 rdataset_next(dns_rdataset_t *rdataset) {
7917         unsigned int count;
7918         unsigned int length;
7919         unsigned char *raw;     /* RDATASLAB */
7920
7921         count = rdataset->privateuint4;
7922         if (count == 0)
7923                 return (ISC_R_NOMORE);
7924         count--;
7925         rdataset->privateuint4 = count;
7926
7927         /*
7928          * Skip forward one record (length + 4) or one offset (4).
7929          */
7930         raw = rdataset->private5;
7931 #if DNS_RDATASET_FIXED
7932         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7933 #endif
7934                 length = raw[0] * 256 + raw[1];
7935                 raw += length;
7936 #if DNS_RDATASET_FIXED
7937         }
7938         rdataset->private5 = raw + 4;           /* length(2) + order(2) */
7939 #else
7940         rdataset->private5 = raw + 2;           /* length(2) */
7941 #endif
7942
7943         return (ISC_R_SUCCESS);
7944 }
7945
7946 static void
7947 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7948         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
7949 #if DNS_RDATASET_FIXED
7950         unsigned int offset;
7951 #endif
7952         unsigned int length;
7953         isc_region_t r;
7954         unsigned int flags = 0;
7955
7956         REQUIRE(raw != NULL);
7957
7958         /*
7959          * Find the start of the record if not already in private5
7960          * then skip the length and order fields.
7961          */
7962 #if DNS_RDATASET_FIXED
7963         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7964                 offset = (raw[0] << 24) + (raw[1] << 16) +
7965                          (raw[2] << 8) + raw[3];
7966                 raw = rdataset->private3;
7967                 raw += offset;
7968         }
7969 #endif
7970         length = raw[0] * 256 + raw[1];
7971 #if DNS_RDATASET_FIXED
7972         raw += 4;
7973 #else
7974         raw += 2;
7975 #endif
7976         if (rdataset->type == dns_rdatatype_rrsig) {
7977                 if (*raw & DNS_RDATASLAB_OFFLINE)
7978                         flags |= DNS_RDATA_OFFLINE;
7979                 length--;
7980                 raw++;
7981         }
7982         r.length = length;
7983         r.base = raw;
7984         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7985         rdata->flags |= flags;
7986 }
7987
7988 static void
7989 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7990         dns_db_t *db = source->private1;
7991         dns_dbnode_t *node = source->private2;
7992         dns_dbnode_t *cloned_node = NULL;
7993
7994         attachnode(db, node, &cloned_node);
7995         INSIST(!ISC_LINK_LINKED(target, link));
7996         *target = *source;
7997         ISC_LINK_INIT(target, link);
7998
7999         /*
8000          * Reset iterator state.
8001          */
8002         target->privateuint4 = 0;
8003         target->private5 = NULL;
8004 }
8005
8006 static unsigned int
8007 rdataset_count(dns_rdataset_t *rdataset) {
8008         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8009         unsigned int count;
8010
8011         count = raw[0] * 256 + raw[1];
8012
8013         return (count);
8014 }
8015
8016 static isc_result_t
8017 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
8018                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
8019 {
8020         dns_db_t *db = rdataset->private1;
8021         dns_dbnode_t *node = rdataset->private2;
8022         dns_dbnode_t *cloned_node;
8023         struct noqname *noqname = rdataset->private6;
8024
8025         cloned_node = NULL;
8026         attachnode(db, node, &cloned_node);
8027         nsec->methods = &rdataset_methods;
8028         nsec->rdclass = db->rdclass;
8029         nsec->type = noqname->type;
8030         nsec->covers = 0;
8031         nsec->ttl = rdataset->ttl;
8032         nsec->trust = rdataset->trust;
8033         nsec->private1 = rdataset->private1;
8034         nsec->private2 = rdataset->private2;
8035         nsec->private3 = noqname->neg;
8036         nsec->privateuint4 = 0;
8037         nsec->private5 = NULL;
8038         nsec->private6 = NULL;
8039         nsec->private7 = NULL;
8040
8041         cloned_node = NULL;
8042         attachnode(db, node, &cloned_node);
8043         nsecsig->methods = &rdataset_methods;
8044         nsecsig->rdclass = db->rdclass;
8045         nsecsig->type = dns_rdatatype_rrsig;
8046         nsecsig->covers = noqname->type;
8047         nsecsig->ttl = rdataset->ttl;
8048         nsecsig->trust = rdataset->trust;
8049         nsecsig->private1 = rdataset->private1;
8050         nsecsig->private2 = rdataset->private2;
8051         nsecsig->private3 = noqname->negsig;
8052         nsecsig->privateuint4 = 0;
8053         nsecsig->private5 = NULL;
8054         nsec->private6 = NULL;
8055         nsec->private7 = NULL;
8056
8057         dns_name_clone(&noqname->name, name);
8058
8059         return (ISC_R_SUCCESS);
8060 }
8061
8062 static isc_result_t
8063 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
8064                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
8065 {
8066         dns_db_t *db = rdataset->private1;
8067         dns_dbnode_t *node = rdataset->private2;
8068         dns_dbnode_t *cloned_node;
8069         struct noqname *closest = rdataset->private7;
8070
8071         cloned_node = NULL;
8072         attachnode(db, node, &cloned_node);
8073         nsec->methods = &rdataset_methods;
8074         nsec->rdclass = db->rdclass;
8075         nsec->type = closest->type;
8076         nsec->covers = 0;
8077         nsec->ttl = rdataset->ttl;
8078         nsec->trust = rdataset->trust;
8079         nsec->private1 = rdataset->private1;
8080         nsec->private2 = rdataset->private2;
8081         nsec->private3 = closest->neg;
8082         nsec->privateuint4 = 0;
8083         nsec->private5 = NULL;
8084         nsec->private6 = NULL;
8085         nsec->private7 = NULL;
8086
8087         cloned_node = NULL;
8088         attachnode(db, node, &cloned_node);
8089         nsecsig->methods = &rdataset_methods;
8090         nsecsig->rdclass = db->rdclass;
8091         nsecsig->type = dns_rdatatype_rrsig;
8092         nsecsig->covers = closest->type;
8093         nsecsig->ttl = rdataset->ttl;
8094         nsecsig->trust = rdataset->trust;
8095         nsecsig->private1 = rdataset->private1;
8096         nsecsig->private2 = rdataset->private2;
8097         nsecsig->private3 = closest->negsig;
8098         nsecsig->privateuint4 = 0;
8099         nsecsig->private5 = NULL;
8100         nsec->private6 = NULL;
8101         nsec->private7 = NULL;
8102
8103         dns_name_clone(&closest->name, name);
8104
8105         return (ISC_R_SUCCESS);
8106 }
8107
8108 static void
8109 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
8110         dns_rbtdb_t *rbtdb = rdataset->private1;
8111         dns_rbtnode_t *rbtnode = rdataset->private2;
8112         rdatasetheader_t *header = rdataset->private3;
8113
8114         header--;
8115         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8116                   isc_rwlocktype_write);
8117         header->trust = rdataset->trust = trust;
8118         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8119                   isc_rwlocktype_write);
8120 }
8121
8122 static void
8123 rdataset_expire(dns_rdataset_t *rdataset) {
8124         dns_rbtdb_t *rbtdb = rdataset->private1;
8125         dns_rbtnode_t *rbtnode = rdataset->private2;
8126         rdatasetheader_t *header = rdataset->private3;
8127
8128         header--;
8129         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8130                   isc_rwlocktype_write);
8131         expire_header(rbtdb, header, ISC_FALSE);
8132         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8133                   isc_rwlocktype_write);
8134 }
8135
8136 /*
8137  * Rdataset Iterator Methods
8138  */
8139
8140 static void
8141 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
8142         rbtdb_rdatasetiter_t *rbtiterator;
8143
8144         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
8145
8146         if (rbtiterator->common.version != NULL)
8147                 closeversion(rbtiterator->common.db,
8148                              &rbtiterator->common.version, ISC_FALSE);
8149         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
8150         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
8151                     sizeof(*rbtiterator));
8152
8153         *iteratorp = NULL;
8154 }
8155
8156 static isc_result_t
8157 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
8158         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8159         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8160         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8161         rbtdb_version_t *rbtversion = rbtiterator->common.version;
8162         rdatasetheader_t *header, *top_next;
8163         rbtdb_serial_t serial;
8164         isc_stdtime_t now;
8165
8166         if (IS_CACHE(rbtdb)) {
8167                 serial = 1;
8168                 now = rbtiterator->common.now;
8169         } else {
8170                 serial = rbtversion->serial;
8171                 now = 0;
8172         }
8173
8174         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8175                   isc_rwlocktype_read);
8176
8177         for (header = rbtnode->data; header != NULL; header = top_next) {
8178                 top_next = header->next;
8179                 do {
8180                         if (header->serial <= serial && !IGNORE(header)) {
8181                                 /*
8182                                  * Is this a "this rdataset doesn't exist"
8183                                  * record?  Or is it too old in the cache?
8184                                  *
8185                                  * Note: unlike everywhere else, we
8186                                  * check for now > header->rdh_ttl instead
8187                                  * of now >= header->rdh_ttl.  This allows
8188                                  * ANY and RRSIG queries for 0 TTL
8189                                  * rdatasets to work.
8190                                  */
8191                                 if (NONEXISTENT(header) ||
8192                                     (now != 0 && now > header->rdh_ttl))
8193                                         header = NULL;
8194                                 break;
8195                         } else
8196                                 header = header->down;
8197                 } while (header != NULL);
8198                 if (header != NULL)
8199                         break;
8200         }
8201
8202         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8203                     isc_rwlocktype_read);
8204
8205         rbtiterator->current = header;
8206
8207         if (header == NULL)
8208                 return (ISC_R_NOMORE);
8209
8210         return (ISC_R_SUCCESS);
8211 }
8212
8213 static isc_result_t
8214 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
8215         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8216         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8217         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8218         rbtdb_version_t *rbtversion = rbtiterator->common.version;
8219         rdatasetheader_t *header, *top_next;
8220         rbtdb_serial_t serial;
8221         isc_stdtime_t now;
8222         rbtdb_rdatatype_t type, negtype;
8223         dns_rdatatype_t rdtype, covers;
8224
8225         header = rbtiterator->current;
8226         if (header == NULL)
8227                 return (ISC_R_NOMORE);
8228
8229         if (IS_CACHE(rbtdb)) {
8230                 serial = 1;
8231                 now = rbtiterator->common.now;
8232         } else {
8233                 serial = rbtversion->serial;
8234                 now = 0;
8235         }
8236
8237         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8238                   isc_rwlocktype_read);
8239
8240         type = header->type;
8241         rdtype = RBTDB_RDATATYPE_BASE(header->type);
8242         if (NEGATIVE(header)) {
8243                 covers = RBTDB_RDATATYPE_EXT(header->type);
8244                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
8245         } else
8246                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
8247         for (header = header->next; header != NULL; header = top_next) {
8248                 top_next = header->next;
8249                 /*
8250                  * If not walking back up the down list.
8251                  */
8252                 if (header->type != type && header->type != negtype) {
8253                         do {
8254                                 if (header->serial <= serial &&
8255                                     !IGNORE(header)) {
8256                                         /*
8257                                          * Is this a "this rdataset doesn't
8258                                          * exist" record?
8259                                          *
8260                                          * Note: unlike everywhere else, we
8261                                          * check for now > header->ttl instead
8262                                          * of now >= header->ttl.  This allows
8263                                          * ANY and RRSIG queries for 0 TTL
8264                                          * rdatasets to work.
8265                                          */
8266                                         if ((header->attributes &
8267                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
8268                                             (now != 0 && now > header->rdh_ttl))
8269                                                 header = NULL;
8270                                         break;
8271                                 } else
8272                                         header = header->down;
8273                         } while (header != NULL);
8274                         if (header != NULL)
8275                                 break;
8276                 }
8277         }
8278
8279         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8280                     isc_rwlocktype_read);
8281
8282         rbtiterator->current = header;
8283
8284         if (header == NULL)
8285                 return (ISC_R_NOMORE);
8286
8287         return (ISC_R_SUCCESS);
8288 }
8289
8290 static void
8291 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
8292         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8293         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8294         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8295         rdatasetheader_t *header;
8296
8297         header = rbtiterator->current;
8298         REQUIRE(header != NULL);
8299
8300         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8301                   isc_rwlocktype_read);
8302
8303         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
8304                       rdataset);
8305
8306         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8307                     isc_rwlocktype_read);
8308 }
8309
8310
8311 /*
8312  * Database Iterator Methods
8313  */
8314
8315 static inline void
8316 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
8317         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8318         dns_rbtnode_t *node = rbtdbiter->node;
8319
8320         if (node == NULL)
8321                 return;
8322
8323         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
8324         reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
8325 }
8326
8327 static inline void
8328 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
8329         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8330         dns_rbtnode_t *node = rbtdbiter->node;
8331         nodelock_t *lock;
8332
8333         if (node == NULL)
8334                 return;
8335
8336         lock = &rbtdb->node_locks[node->locknum].lock;
8337         NODE_LOCK(lock, isc_rwlocktype_read);
8338         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
8339                             rbtdbiter->tree_locked, ISC_FALSE);
8340         NODE_UNLOCK(lock, isc_rwlocktype_read);
8341
8342         rbtdbiter->node = NULL;
8343 }
8344
8345 static void
8346 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
8347         dns_rbtnode_t *node;
8348         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8349         isc_boolean_t was_read_locked = ISC_FALSE;
8350         nodelock_t *lock;
8351         int i;
8352
8353         if (rbtdbiter->delete != 0) {
8354                 /*
8355                  * Note that "%d node of %d in tree" can report things like
8356                  * "flush_deletions: 59 nodes of 41 in tree".  This means
8357                  * That some nodes appear on the deletions list more than
8358                  * once.  Only the last occurence will actually be deleted.
8359                  */
8360                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
8361                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
8362                               "flush_deletions: %d nodes of %d in tree",
8363                               rbtdbiter->delete,
8364                               dns_rbt_nodecount(rbtdb->tree));
8365
8366                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
8367                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8368                         was_read_locked = ISC_TRUE;
8369                 }
8370                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8371                 rbtdbiter->tree_locked = isc_rwlocktype_write;
8372
8373                 for (i = 0; i < rbtdbiter->delete; i++) {
8374                         node = rbtdbiter->deletions[i];
8375                         lock = &rbtdb->node_locks[node->locknum].lock;
8376
8377                         NODE_LOCK(lock, isc_rwlocktype_read);
8378                         decrement_reference(rbtdb, node, 0,
8379                                             isc_rwlocktype_read,
8380                                             rbtdbiter->tree_locked, ISC_FALSE);
8381                         NODE_UNLOCK(lock, isc_rwlocktype_read);
8382                 }
8383
8384                 rbtdbiter->delete = 0;
8385
8386                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8387                 if (was_read_locked) {
8388                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8389                         rbtdbiter->tree_locked = isc_rwlocktype_read;
8390
8391                 } else {
8392                         rbtdbiter->tree_locked = isc_rwlocktype_none;
8393                 }
8394         }
8395 }
8396
8397 static inline void
8398 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
8399         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8400
8401         REQUIRE(rbtdbiter->paused);
8402         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
8403
8404         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8405         rbtdbiter->tree_locked = isc_rwlocktype_read;
8406
8407         rbtdbiter->paused = ISC_FALSE;
8408 }
8409
8410 static void
8411 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
8412         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
8413         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8414         dns_db_t *db = NULL;
8415
8416         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
8417                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8418                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8419         } else
8420                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
8421
8422         dereference_iter_node(rbtdbiter);
8423
8424         flush_deletions(rbtdbiter);
8425
8426         dns_db_attach(rbtdbiter->common.db, &db);
8427         dns_db_detach(&rbtdbiter->common.db);
8428
8429         dns_rbtnodechain_reset(&rbtdbiter->chain);
8430         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8431         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
8432         dns_db_detach(&db);
8433
8434         *iteratorp = NULL;
8435 }
8436
8437 static isc_result_t
8438 dbiterator_first(dns_dbiterator_t *iterator) {
8439         isc_result_t result;
8440         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8441         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8442         dns_name_t *name, *origin;
8443
8444         if (rbtdbiter->result != ISC_R_SUCCESS &&
8445             rbtdbiter->result != ISC_R_NOMORE)
8446                 return (rbtdbiter->result);
8447
8448         if (rbtdbiter->paused)
8449                 resume_iteration(rbtdbiter);
8450
8451         dereference_iter_node(rbtdbiter);
8452
8453         name = dns_fixedname_name(&rbtdbiter->name);
8454         origin = dns_fixedname_name(&rbtdbiter->origin);
8455         dns_rbtnodechain_reset(&rbtdbiter->chain);
8456         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8457
8458         if (rbtdbiter->nsec3only) {
8459                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8460                 result = dns_rbtnodechain_first(rbtdbiter->current,
8461                                                 rbtdb->nsec3, name, origin);
8462         } else {
8463                 rbtdbiter->current = &rbtdbiter->chain;
8464                 result = dns_rbtnodechain_first(rbtdbiter->current,
8465                                                 rbtdb->tree, name, origin);
8466                 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
8467                         rbtdbiter->current = &rbtdbiter->nsec3chain;
8468                         result = dns_rbtnodechain_first(rbtdbiter->current,
8469                                                         rbtdb->nsec3, name,
8470                                                         origin);
8471                 }
8472         }
8473         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8474                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8475                                                   NULL, &rbtdbiter->node);
8476                 if (result == ISC_R_SUCCESS) {
8477                         rbtdbiter->new_origin = ISC_TRUE;
8478                         reference_iter_node(rbtdbiter);
8479                 }
8480         } else {
8481                 INSIST(result == ISC_R_NOTFOUND);
8482                 result = ISC_R_NOMORE; /* The tree is empty. */
8483         }
8484
8485         rbtdbiter->result = result;
8486
8487         return (result);
8488 }
8489
8490 static isc_result_t
8491 dbiterator_last(dns_dbiterator_t *iterator) {
8492         isc_result_t result;
8493         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8494         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8495         dns_name_t *name, *origin;
8496
8497         if (rbtdbiter->result != ISC_R_SUCCESS &&
8498             rbtdbiter->result != ISC_R_NOMORE)
8499                 return (rbtdbiter->result);
8500
8501         if (rbtdbiter->paused)
8502                 resume_iteration(rbtdbiter);
8503
8504         dereference_iter_node(rbtdbiter);
8505
8506         name = dns_fixedname_name(&rbtdbiter->name);
8507         origin = dns_fixedname_name(&rbtdbiter->origin);
8508         dns_rbtnodechain_reset(&rbtdbiter->chain);
8509         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8510
8511         result = ISC_R_NOTFOUND;
8512         if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
8513                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8514                 result = dns_rbtnodechain_last(rbtdbiter->current,
8515                                                rbtdb->nsec3, name, origin);
8516         }
8517         if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
8518                 rbtdbiter->current = &rbtdbiter->chain;
8519                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8520                                                name, origin);
8521         }
8522         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8523                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8524                                                   NULL, &rbtdbiter->node);
8525                 if (result == ISC_R_SUCCESS) {
8526                         rbtdbiter->new_origin = ISC_TRUE;
8527                         reference_iter_node(rbtdbiter);
8528                 }
8529         } else {
8530                 INSIST(result == ISC_R_NOTFOUND);
8531                 result = ISC_R_NOMORE; /* The tree is empty. */
8532         }
8533
8534         rbtdbiter->result = result;
8535
8536         return (result);
8537 }
8538
8539 static isc_result_t
8540 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
8541         isc_result_t result, tresult;
8542         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8543         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8544         dns_name_t *iname, *origin;
8545
8546         if (rbtdbiter->result != ISC_R_SUCCESS &&
8547             rbtdbiter->result != ISC_R_NOTFOUND &&
8548             rbtdbiter->result != ISC_R_NOMORE)
8549                 return (rbtdbiter->result);
8550
8551         if (rbtdbiter->paused)
8552                 resume_iteration(rbtdbiter);
8553
8554         dereference_iter_node(rbtdbiter);
8555
8556         iname = dns_fixedname_name(&rbtdbiter->name);
8557         origin = dns_fixedname_name(&rbtdbiter->origin);
8558         dns_rbtnodechain_reset(&rbtdbiter->chain);
8559         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8560
8561         if (rbtdbiter->nsec3only) {
8562                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8563                 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8564                                           &rbtdbiter->node,
8565                                           rbtdbiter->current,
8566                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8567         } else if (rbtdbiter->nonsec3) {
8568                 rbtdbiter->current = &rbtdbiter->chain;
8569                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8570                                           &rbtdbiter->node,
8571                                           rbtdbiter->current,
8572                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8573         } else {
8574                 /*
8575                  * Stay on main chain if not found on either chain.
8576                  */
8577                 rbtdbiter->current = &rbtdbiter->chain;
8578                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8579                                           &rbtdbiter->node,
8580                                           rbtdbiter->current,
8581                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8582                 if (result == DNS_R_PARTIALMATCH) {
8583                         dns_rbtnode_t *node = NULL;
8584                         tresult = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8585                                                   &node, &rbtdbiter->nsec3chain,
8586                                                   DNS_RBTFIND_EMPTYDATA,
8587                                                   NULL, NULL);
8588                         if (tresult == ISC_R_SUCCESS) {
8589                                 rbtdbiter->node = node;
8590                                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8591                                 result = tresult;
8592                         }
8593                 }
8594         }
8595
8596 #if 1
8597         if (result == ISC_R_SUCCESS) {
8598                 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
8599                                                   origin, NULL);
8600                 if (result == ISC_R_SUCCESS) {
8601                         rbtdbiter->new_origin = ISC_TRUE;
8602                         reference_iter_node(rbtdbiter);
8603                 }
8604         } else if (result == DNS_R_PARTIALMATCH) {
8605                 result = ISC_R_NOTFOUND;
8606                 rbtdbiter->node = NULL;
8607         }
8608
8609         rbtdbiter->result = result;
8610 #else
8611         if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8612                 isc_result_t tresult;
8613                 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
8614                                                    origin, NULL);
8615                 if (tresult == ISC_R_SUCCESS) {
8616                         rbtdbiter->new_origin = ISC_TRUE;
8617                         reference_iter_node(rbtdbiter);
8618                 } else {
8619                         result = tresult;
8620                         rbtdbiter->node = NULL;
8621                 }
8622         } else
8623                 rbtdbiter->node = NULL;
8624
8625         rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
8626                             ISC_R_SUCCESS : result;
8627 #endif
8628
8629         return (result);
8630 }
8631
8632 static isc_result_t
8633 dbiterator_prev(dns_dbiterator_t *iterator) {
8634         isc_result_t result;
8635         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8636         dns_name_t *name, *origin;
8637         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8638
8639         REQUIRE(rbtdbiter->node != NULL);
8640
8641         if (rbtdbiter->result != ISC_R_SUCCESS)
8642                 return (rbtdbiter->result);
8643
8644         if (rbtdbiter->paused)
8645                 resume_iteration(rbtdbiter);
8646
8647         name = dns_fixedname_name(&rbtdbiter->name);
8648         origin = dns_fixedname_name(&rbtdbiter->origin);
8649         result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
8650         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8651             !rbtdbiter->nonsec3 &&
8652             &rbtdbiter->nsec3chain == rbtdbiter->current) {
8653                 rbtdbiter->current = &rbtdbiter->chain;
8654                 dns_rbtnodechain_reset(rbtdbiter->current);
8655                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8656                                                name, origin);
8657                 if (result == ISC_R_NOTFOUND)
8658                         result = ISC_R_NOMORE;
8659         }
8660
8661         dereference_iter_node(rbtdbiter);
8662
8663         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8664                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8665                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8666                                                   NULL, &rbtdbiter->node);
8667         }
8668
8669         if (result == ISC_R_SUCCESS)
8670                 reference_iter_node(rbtdbiter);
8671
8672         rbtdbiter->result = result;
8673
8674         return (result);
8675 }
8676
8677 static isc_result_t
8678 dbiterator_next(dns_dbiterator_t *iterator) {
8679         isc_result_t result;
8680         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8681         dns_name_t *name, *origin;
8682         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8683
8684         REQUIRE(rbtdbiter->node != NULL);
8685
8686         if (rbtdbiter->result != ISC_R_SUCCESS)
8687                 return (rbtdbiter->result);
8688
8689         if (rbtdbiter->paused)
8690                 resume_iteration(rbtdbiter);
8691
8692         name = dns_fixedname_name(&rbtdbiter->name);
8693         origin = dns_fixedname_name(&rbtdbiter->origin);
8694         result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8695         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8696             !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8697                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8698                 dns_rbtnodechain_reset(rbtdbiter->current);
8699                 result = dns_rbtnodechain_first(rbtdbiter->current,
8700                                                 rbtdb->nsec3, name, origin);
8701                 if (result == ISC_R_NOTFOUND)
8702                         result = ISC_R_NOMORE;
8703         }
8704
8705         dereference_iter_node(rbtdbiter);
8706
8707         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8708                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8709                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8710                                                   NULL, &rbtdbiter->node);
8711         }
8712         if (result == ISC_R_SUCCESS)
8713                 reference_iter_node(rbtdbiter);
8714
8715         rbtdbiter->result = result;
8716
8717         return (result);
8718 }
8719
8720 static isc_result_t
8721 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8722                    dns_name_t *name)
8723 {
8724         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8725         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8726         dns_rbtnode_t *node = rbtdbiter->node;
8727         isc_result_t result;
8728         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8729         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8730
8731         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8732         REQUIRE(rbtdbiter->node != NULL);
8733
8734         if (rbtdbiter->paused)
8735                 resume_iteration(rbtdbiter);
8736
8737         if (name != NULL) {
8738                 if (rbtdbiter->common.relative_names)
8739                         origin = NULL;
8740                 result = dns_name_concatenate(nodename, origin, name, NULL);
8741                 if (result != ISC_R_SUCCESS)
8742                         return (result);
8743                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8744                         result = DNS_R_NEWORIGIN;
8745         } else
8746                 result = ISC_R_SUCCESS;
8747
8748         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8749         new_reference(rbtdb, node);
8750         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8751
8752         *nodep = rbtdbiter->node;
8753
8754         if (iterator->cleaning && result == ISC_R_SUCCESS) {
8755                 isc_result_t expire_result;
8756
8757                 /*
8758                  * If the deletion array is full, flush it before trying
8759                  * to expire the current node.  The current node can't
8760                  * fully deleted while the iteration cursor is still on it.
8761                  */
8762                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8763                         flush_deletions(rbtdbiter);
8764
8765                 expire_result = expirenode(iterator->db, *nodep, 0);
8766
8767                 /*
8768                  * expirenode() currently always returns success.
8769                  */
8770                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8771                         unsigned int refs;
8772
8773                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
8774                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8775                         dns_rbtnode_refincrement(node, &refs);
8776                         INSIST(refs != 0);
8777                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8778                 }
8779         }
8780
8781         return (result);
8782 }
8783
8784 static isc_result_t
8785 dbiterator_pause(dns_dbiterator_t *iterator) {
8786         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8787         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8788
8789         if (rbtdbiter->result != ISC_R_SUCCESS &&
8790             rbtdbiter->result != ISC_R_NOMORE)
8791                 return (rbtdbiter->result);
8792
8793         if (rbtdbiter->paused)
8794                 return (ISC_R_SUCCESS);
8795
8796         rbtdbiter->paused = ISC_TRUE;
8797
8798         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8799                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8800                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8801                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8802         }
8803
8804         flush_deletions(rbtdbiter);
8805
8806         return (ISC_R_SUCCESS);
8807 }
8808
8809 static isc_result_t
8810 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8811         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8812         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8813
8814         if (rbtdbiter->result != ISC_R_SUCCESS)
8815                 return (rbtdbiter->result);
8816
8817         return (dns_name_copy(origin, name, NULL));
8818 }
8819
8820 /*%
8821  * Additional cache routines.
8822  */
8823 static isc_result_t
8824 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8825                        dns_rdatatype_t qtype, dns_acache_t *acache,
8826                        dns_zone_t **zonep, dns_db_t **dbp,
8827                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8828                        dns_name_t *fname, dns_message_t *msg,
8829                        isc_stdtime_t now)
8830 {
8831 #ifndef BIND9
8832         UNUSED(rdataset);
8833         UNUSED(type);
8834         UNUSED(qtype);
8835         UNUSED(acache);
8836         UNUSED(zonep);
8837         UNUSED(dbp);
8838         UNUSED(versionp);
8839         UNUSED(nodep);
8840         UNUSED(fname);
8841         UNUSED(msg);
8842         UNUSED(now);
8843
8844         return (ISC_R_NOTIMPLEMENTED);
8845 #else
8846         dns_rbtdb_t *rbtdb = rdataset->private1;
8847         dns_rbtnode_t *rbtnode = rdataset->private2;
8848         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8849         unsigned int current_count = rdataset->privateuint4;
8850         unsigned int count;
8851         rdatasetheader_t *header;
8852         nodelock_t *nodelock;
8853         unsigned int total_count;
8854         acachectl_t *acarray;
8855         dns_acacheentry_t *entry;
8856         isc_result_t result;
8857
8858         UNUSED(qtype); /* we do not use this value at least for now */
8859         UNUSED(acache);
8860
8861         header = (struct rdatasetheader *)(raw - sizeof(*header));
8862
8863         total_count = raw[0] * 256 + raw[1];
8864         INSIST(total_count > current_count);
8865         count = total_count - current_count - 1;
8866
8867         acarray = NULL;
8868
8869         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8870         NODE_LOCK(nodelock, isc_rwlocktype_read);
8871
8872         switch (type) {
8873         case dns_rdatasetadditional_fromauth:
8874                 acarray = header->additional_auth;
8875                 break;
8876         case dns_rdatasetadditional_fromcache:
8877                 acarray = NULL;
8878                 break;
8879         case dns_rdatasetadditional_fromglue:
8880                 acarray = header->additional_glue;
8881                 break;
8882         default:
8883                 INSIST(0);
8884         }
8885
8886         if (acarray == NULL) {
8887                 if (type != dns_rdatasetadditional_fromcache)
8888                         dns_acache_countquerymiss(acache);
8889                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8890                 return (ISC_R_NOTFOUND);
8891         }
8892
8893         if (acarray[count].entry == NULL) {
8894                 dns_acache_countquerymiss(acache);
8895                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8896                 return (ISC_R_NOTFOUND);
8897         }
8898
8899         entry = NULL;
8900         dns_acache_attachentry(acarray[count].entry, &entry);
8901
8902         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8903
8904         result = dns_acache_getentry(entry, zonep, dbp, versionp,
8905                                      nodep, fname, msg, now);
8906
8907         dns_acache_detachentry(&entry);
8908
8909         return (result);
8910 }
8911
8912 static void
8913 acache_callback(dns_acacheentry_t *entry, void **arg) {
8914         dns_rbtdb_t *rbtdb;
8915         dns_rbtnode_t *rbtnode;
8916         nodelock_t *nodelock;
8917         acachectl_t *acarray = NULL;
8918         acache_cbarg_t *cbarg;
8919         unsigned int count;
8920
8921         REQUIRE(arg != NULL);
8922         cbarg = *arg;
8923
8924         /*
8925          * The caller must hold the entry lock.
8926          */
8927
8928         rbtdb = (dns_rbtdb_t *)cbarg->db;
8929         rbtnode = (dns_rbtnode_t *)cbarg->node;
8930
8931         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8932         NODE_LOCK(nodelock, isc_rwlocktype_write);
8933
8934         switch (cbarg->type) {
8935         case dns_rdatasetadditional_fromauth:
8936                 acarray = cbarg->header->additional_auth;
8937                 break;
8938         case dns_rdatasetadditional_fromglue:
8939                 acarray = cbarg->header->additional_glue;
8940                 break;
8941         default:
8942                 INSIST(0);
8943         }
8944
8945         count = cbarg->count;
8946         if (acarray != NULL && acarray[count].entry == entry) {
8947                 acarray[count].entry = NULL;
8948                 INSIST(acarray[count].cbarg == cbarg);
8949                 acarray[count].cbarg = NULL;
8950                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8951                 dns_acache_detachentry(&entry);
8952         }
8953
8954         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8955
8956         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8957         dns_db_detach((dns_db_t **)(void*)&rbtdb);
8958
8959         *arg = NULL;
8960 #endif /* BIND9 */
8961 }
8962
8963 #ifdef BIND9
8964 static void
8965 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8966                       acache_cbarg_t **cbargp)
8967 {
8968         acache_cbarg_t *cbarg;
8969
8970         REQUIRE(mctx != NULL);
8971         REQUIRE(entry != NULL);
8972         REQUIRE(cbargp != NULL && *cbargp != NULL);
8973
8974         cbarg = *cbargp;
8975
8976         if (dns_acache_cancelentry(entry)) {
8977                 dns_db_detachnode(cbarg->db, &cbarg->node);
8978                 dns_db_detach(&cbarg->db);
8979         }
8980
8981         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8982
8983         *cbargp = NULL;
8984 }
8985 #endif /* BIND9 */
8986
8987 static isc_result_t
8988 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8989                        dns_rdatatype_t qtype, dns_acache_t *acache,
8990                        dns_zone_t *zone, dns_db_t *db,
8991                        dns_dbversion_t *version, dns_dbnode_t *node,
8992                        dns_name_t *fname)
8993 {
8994 #ifndef BIND9
8995         UNUSED(rdataset);
8996         UNUSED(type);
8997         UNUSED(qtype);
8998         UNUSED(acache);
8999         UNUSED(zone);
9000         UNUSED(db);
9001         UNUSED(version);
9002         UNUSED(node);
9003         UNUSED(fname);
9004
9005         return (ISC_R_NOTIMPLEMENTED);
9006 #else
9007         dns_rbtdb_t *rbtdb = rdataset->private1;
9008         dns_rbtnode_t *rbtnode = rdataset->private2;
9009         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
9010         unsigned int current_count = rdataset->privateuint4;
9011         rdatasetheader_t *header;
9012         unsigned int total_count, count;
9013         nodelock_t *nodelock;
9014         isc_result_t result;
9015         acachectl_t *acarray;
9016         dns_acacheentry_t *newentry, *oldentry = NULL;
9017         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
9018
9019         UNUSED(qtype);
9020
9021         if (type == dns_rdatasetadditional_fromcache)
9022                 return (ISC_R_SUCCESS);
9023
9024         header = (struct rdatasetheader *)(raw - sizeof(*header));
9025
9026         total_count = raw[0] * 256 + raw[1];
9027         INSIST(total_count > current_count);
9028         count = total_count - current_count - 1; /* should be private data */
9029
9030         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
9031         if (newcbarg == NULL)
9032                 return (ISC_R_NOMEMORY);
9033         newcbarg->type = type;
9034         newcbarg->count = count;
9035         newcbarg->header = header;
9036         newcbarg->db = NULL;
9037         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
9038         newcbarg->node = NULL;
9039         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
9040                           &newcbarg->node);
9041         newentry = NULL;
9042         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
9043                                         acache_callback, newcbarg, &newentry);
9044         if (result != ISC_R_SUCCESS)
9045                 goto fail;
9046
9047         /* Set cache data in the new entry. */
9048         result = dns_acache_setentry(acache, newentry, zone, db,
9049                                      version, node, fname);
9050         if (result != ISC_R_SUCCESS)
9051                 goto fail;
9052
9053         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
9054         NODE_LOCK(nodelock, isc_rwlocktype_write);
9055
9056         acarray = NULL;
9057         switch (type) {
9058         case dns_rdatasetadditional_fromauth:
9059                 acarray = header->additional_auth;
9060                 break;
9061         case dns_rdatasetadditional_fromglue:
9062                 acarray = header->additional_glue;
9063                 break;
9064         default:
9065                 INSIST(0);
9066         }
9067
9068         if (acarray == NULL) {
9069                 unsigned int i;
9070
9071                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
9072                                       sizeof(acachectl_t));
9073
9074                 if (acarray == NULL) {
9075                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9076                         goto fail;
9077                 }
9078
9079                 for (i = 0; i < total_count; i++) {
9080                         acarray[i].entry = NULL;
9081                         acarray[i].cbarg = NULL;
9082                 }
9083         }
9084         switch (type) {
9085         case dns_rdatasetadditional_fromauth:
9086                 header->additional_auth = acarray;
9087                 break;
9088         case dns_rdatasetadditional_fromglue:
9089                 header->additional_glue = acarray;
9090                 break;
9091         default:
9092                 INSIST(0);
9093         }
9094
9095         if (acarray[count].entry != NULL) {
9096                 /*
9097                  * Swap the entry.  Delay cleaning-up the old entry since
9098                  * it would require a node lock.
9099                  */
9100                 oldentry = acarray[count].entry;
9101                 INSIST(acarray[count].cbarg != NULL);
9102                 oldcbarg = acarray[count].cbarg;
9103         }
9104         acarray[count].entry = newentry;
9105         acarray[count].cbarg = newcbarg;
9106
9107         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9108
9109         if (oldentry != NULL) {
9110                 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
9111                 dns_acache_detachentry(&oldentry);
9112         }
9113
9114         return (ISC_R_SUCCESS);
9115
9116  fail:
9117         if (newcbarg != NULL) {
9118                 if (newentry != NULL) {
9119                         acache_cancelentry(rbtdb->common.mctx, newentry,
9120                                            &newcbarg);
9121                         dns_acache_detachentry(&newentry);
9122                 } else {
9123                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
9124                         dns_db_detach(&newcbarg->db);
9125                         isc_mem_put(rbtdb->common.mctx, newcbarg,
9126                             sizeof(*newcbarg));
9127                 }
9128         }
9129
9130         return (result);
9131 #endif
9132 }
9133
9134 static isc_result_t
9135 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
9136                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
9137 {
9138 #ifndef BIND9
9139         UNUSED(acache);
9140         UNUSED(rdataset);
9141         UNUSED(type);
9142         UNUSED(qtype);
9143
9144         return (ISC_R_NOTIMPLEMENTED);
9145 #else
9146         dns_rbtdb_t *rbtdb = rdataset->private1;
9147         dns_rbtnode_t *rbtnode = rdataset->private2;
9148         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
9149         unsigned int current_count = rdataset->privateuint4;
9150         rdatasetheader_t *header;
9151         nodelock_t *nodelock;
9152         unsigned int total_count, count;
9153         acachectl_t *acarray;
9154         dns_acacheentry_t *entry;
9155         acache_cbarg_t *cbarg;
9156
9157         UNUSED(qtype);          /* we do not use this value at least for now */
9158         UNUSED(acache);
9159
9160         if (type == dns_rdatasetadditional_fromcache)
9161                 return (ISC_R_SUCCESS);
9162
9163         header = (struct rdatasetheader *)(raw - sizeof(*header));
9164
9165         total_count = raw[0] * 256 + raw[1];
9166         INSIST(total_count > current_count);
9167         count = total_count - current_count - 1;
9168
9169         acarray = NULL;
9170         entry = NULL;
9171
9172         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
9173         NODE_LOCK(nodelock, isc_rwlocktype_write);
9174
9175         switch (type) {
9176         case dns_rdatasetadditional_fromauth:
9177                 acarray = header->additional_auth;
9178                 break;
9179         case dns_rdatasetadditional_fromglue:
9180                 acarray = header->additional_glue;
9181                 break;
9182         default:
9183                 INSIST(0);
9184         }
9185
9186         if (acarray == NULL) {
9187                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9188                 return (ISC_R_NOTFOUND);
9189         }
9190
9191         entry = acarray[count].entry;
9192         if (entry == NULL) {
9193                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9194                 return (ISC_R_NOTFOUND);
9195         }
9196
9197         acarray[count].entry = NULL;
9198         cbarg = acarray[count].cbarg;
9199         acarray[count].cbarg = NULL;
9200
9201         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9202
9203         if (entry != NULL) {
9204                 if (cbarg != NULL)
9205                         acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
9206                 dns_acache_detachentry(&entry);
9207         }
9208
9209         return (ISC_R_SUCCESS);
9210 #endif
9211 }
9212
9213 /*%
9214  * Routines for LRU-based cache management.
9215  */
9216
9217 /*%
9218  * See if a given cache entry that is being reused needs to be updated
9219  * in the LRU-list.  From the LRU management point of view, this function is
9220  * expected to return true for almost all cases.  When used with threads,
9221  * however, this may cause a non-negligible performance penalty because a
9222  * writer lock will have to be acquired before updating the list.
9223  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
9224  * function returns true if the entry has not been updated for some period of
9225  * time.  We differentiate the NS or glue address case and the others since
9226  * experiments have shown that the former tends to be accessed relatively
9227  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
9228  * may cause external queries at a higher level zone, involving more
9229  * transactions).
9230  *
9231  * Caller must hold the node (read or write) lock.
9232  */
9233 static inline isc_boolean_t
9234 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
9235         if ((header->attributes &
9236              (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
9237                 return (ISC_FALSE);
9238
9239 #if DNS_RBTDB_LIMITLRUUPDATE
9240         if (header->type == dns_rdatatype_ns ||
9241             (header->trust == dns_trust_glue &&
9242              (header->type == dns_rdatatype_a ||
9243               header->type == dns_rdatatype_aaaa))) {
9244                 /*
9245                  * Glue records are updated if at least 60 seconds have passed
9246                  * since the previous update time.
9247                  */
9248                 return (header->last_used + 60 <= now);
9249         }
9250
9251         /* Other records are updated if 5 minutes have passed. */
9252         return (header->last_used + 300 <= now);
9253 #else
9254         UNUSED(now);
9255
9256         return (ISC_TRUE);
9257 #endif
9258 }
9259
9260 /*%
9261  * Update the timestamp of a given cache entry and move it to the head
9262  * of the corresponding LRU list.
9263  *
9264  * Caller must hold the node (write) lock.
9265  *
9266  * Note that the we do NOT touch the heap here, as the TTL has not changed.
9267  */
9268 static void
9269 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
9270               isc_stdtime_t now)
9271 {
9272         INSIST(IS_CACHE(rbtdb));
9273
9274         /* To be checked: can we really assume this? XXXMLG */
9275         INSIST(ISC_LINK_LINKED(header, link));
9276
9277         ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
9278         header->last_used = now;
9279         ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
9280 }
9281
9282 /*%
9283  * Purge some expired and/or stale (i.e. unused for some period) cache entries
9284  * under an overmem condition.  To recover from this condition quickly, up to
9285  * 2 entries will be purged.  This process is triggered while adding a new
9286  * entry, and we specifically avoid purging entries in the same LRU bucket as
9287  * the one to which the new entry will belong.  Otherwise, we might purge
9288  * entries of the same name of different RR types while adding RRsets from a
9289  * single response (consider the case where we're adding A and AAAA glue records
9290  * of the same NS name).
9291  */
9292 static void
9293 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
9294               isc_stdtime_t now, isc_boolean_t tree_locked)
9295 {
9296         rdatasetheader_t *header, *header_prev;
9297         unsigned int locknum;
9298         int purgecount = 2;
9299
9300         for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
9301              locknum != locknum_start && purgecount > 0;
9302              locknum = (locknum + 1) % rbtdb->node_lock_count) {
9303                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
9304                           isc_rwlocktype_write);
9305
9306                 header = isc_heap_element(rbtdb->heaps[locknum], 1);
9307                 if (header && header->rdh_ttl < now - RBTDB_VIRTUAL) {
9308                         expire_header(rbtdb, header, tree_locked);
9309                         purgecount--;
9310                 }
9311
9312                 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
9313                      header != NULL && purgecount > 0;
9314                      header = header_prev) {
9315                         header_prev = ISC_LIST_PREV(header, link);
9316                         /*
9317                          * Unlink the entry at this point to avoid checking it
9318                          * again even if it's currently used someone else and
9319                          * cannot be purged at this moment.  This entry won't be
9320                          * referenced any more (so unlinking is safe) since the
9321                          * TTL was reset to 0.
9322                          */
9323                         ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
9324                                         link);
9325                         expire_header(rbtdb, header, tree_locked);
9326                         purgecount--;
9327                 }
9328
9329                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
9330                                     isc_rwlocktype_write);
9331         }
9332 }
9333
9334 static void
9335 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
9336               isc_boolean_t tree_locked)
9337 {
9338         set_ttl(rbtdb, header, 0);
9339         header->attributes |= RDATASET_ATTR_STALE;
9340         header->node->dirty = 1;
9341
9342         /*
9343          * Caller must hold the node (write) lock.
9344          */
9345
9346         if (dns_rbtnode_refcurrent(header->node) == 0) {
9347                 /*
9348                  * If no one else is using the node, we can clean it up now.
9349                  * We first need to gain a new reference to the node to meet a
9350                  * requirement of decrement_reference().
9351                  */
9352                 new_reference(rbtdb, header->node);
9353                 decrement_reference(rbtdb, header->node, 0,
9354                                     isc_rwlocktype_write,
9355                                     tree_locked ? isc_rwlocktype_write :
9356                                     isc_rwlocktype_none, ISC_FALSE);
9357         }
9358 }