]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/bind9/lib/dns/rbtdb.c
Upgrade to version 9.8.0-P4
[FreeBSD/FreeBSD.git] / contrib / bind9 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004-2011  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: rbtdb.c,v 1.310.8.1.2.1 2011-06-21 20:15:48 each Exp $ */
19
20 /*! \file */
21
22 /*
23  * Principal Author: Bob Halley
24  */
25
26 #include <config.h>
27
28 /* #define inline */
29
30 #include <isc/event.h>
31 #include <isc/heap.h>
32 #include <isc/mem.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
41 #include <isc/task.h>
42 #include <isc/time.h>
43 #include <isc/util.h>
44
45 #include <dns/acache.h>
46 #include <dns/db.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
50 #include <dns/lib.h>
51 #include <dns/log.h>
52 #include <dns/masterdump.h>
53 #include <dns/nsec.h>
54 #include <dns/nsec3.h>
55 #include <dns/rbt.h>
56 #include <dns/rpz.h>
57 #include <dns/rdata.h>
58 #include <dns/rdataset.h>
59 #include <dns/rdatasetiter.h>
60 #include <dns/rdataslab.h>
61 #include <dns/rdatastruct.h>
62 #include <dns/result.h>
63 #include <dns/stats.h>
64 #include <dns/view.h>
65 #include <dns/zone.h>
66 #include <dns/zonekey.h>
67
68 #ifdef DNS_RBTDB_VERSION64
69 #include "rbtdb64.h"
70 #else
71 #include "rbtdb.h"
72 #endif
73
74 #ifdef DNS_RBTDB_VERSION64
75 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
76 #else
77 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
78 #endif
79
80 /*%
81  * Note that "impmagic" is not the first four bytes of the struct, so
82  * ISC_MAGIC_VALID cannot be used.
83  */
84 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
85                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
86
87 #ifdef DNS_RBTDB_VERSION64
88 typedef isc_uint64_t                    rbtdb_serial_t;
89 /*%
90  * Make casting easier in symbolic debuggers by using different names
91  * for the 64 bit version.
92  */
93 #define dns_rbtdb_t dns_rbtdb64_t
94 #define rdatasetheader_t rdatasetheader64_t
95 #define rbtdb_version_t rbtdb_version64_t
96 #else
97 typedef isc_uint32_t                    rbtdb_serial_t;
98 #endif
99
100 typedef isc_uint32_t                    rbtdb_rdatatype_t;
101
102 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
103 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
104 #define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
105
106 #define RBTDB_RDATATYPE_SIGNSEC \
107                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
108 #define RBTDB_RDATATYPE_SIGNSEC3 \
109                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
110 #define RBTDB_RDATATYPE_SIGNS \
111                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
112 #define RBTDB_RDATATYPE_SIGCNAME \
113                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
114 #define RBTDB_RDATATYPE_SIGDNAME \
115                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
116 #define RBTDB_RDATATYPE_NCACHEANY \
117                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
118
119 /*
120  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
121  * Using rwlock is effective with regard to lookup performance only when
122  * it is implemented in an efficient way.
123  * Otherwise, it is generally wise to stick to the simple locking since rwlock
124  * would require more memory or can even make lookups slower due to its own
125  * overhead (when it internally calls mutex locks).
126  */
127 #ifdef ISC_RWLOCK_USEATOMIC
128 #define DNS_RBTDB_USERWLOCK 1
129 #else
130 #define DNS_RBTDB_USERWLOCK 0
131 #endif
132
133 #if DNS_RBTDB_USERWLOCK
134 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
135 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
136 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
137 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
138 #else
139 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
140 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
141 #define RBTDB_LOCK(l, t)        LOCK(l)
142 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
143 #endif
144
145 /*
146  * Since node locking is sensitive to both performance and memory footprint,
147  * we need some trick here.  If we have both high-performance rwlock and
148  * high performance and small-memory reference counters, we use rwlock for
149  * node lock and isc_refcount for node references.  In this case, we don't have
150  * to protect the access to the counters by locks.
151  * Otherwise, we simply use ordinary mutex lock for node locking, and use
152  * simple integers as reference counters which is protected by the lock.
153  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
154  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
155  * counters first and then protect other parts of a node as read-only data.
156  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
157  * provided for these special cases.  When we can use the efficient backend
158  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
159  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
160  * section including the access to the reference counter.
161  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
162  * section is also protected by NODE_STRONGLOCK().
163  */
164 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
165 typedef isc_rwlock_t nodelock_t;
166
167 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
168 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
169 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
170 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
171 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
172
173 #define NODE_STRONGLOCK(l)      ((void)0)
174 #define NODE_STRONGUNLOCK(l)    ((void)0)
175 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
176 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
177 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
178 #else
179 typedef isc_mutex_t nodelock_t;
180
181 #define NODE_INITLOCK(l)        isc_mutex_init(l)
182 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
183 #define NODE_LOCK(l, t)         LOCK(l)
184 #define NODE_UNLOCK(l, t)       UNLOCK(l)
185 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
186
187 #define NODE_STRONGLOCK(l)      LOCK(l)
188 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
189 #define NODE_WEAKLOCK(l, t)     ((void)0)
190 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
191 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
192 #endif
193
194 /*%
195  * Whether to rate-limit updating the LRU to avoid possible thread contention.
196  * Our performance measurement has shown the cost is marginal, so it's defined
197  * to be 0 by default either with or without threads.
198  */
199 #ifndef DNS_RBTDB_LIMITLRUUPDATE
200 #define DNS_RBTDB_LIMITLRUUPDATE 0
201 #endif
202
203 /*
204  * Allow clients with a virtual time of up to 5 minutes in the past to see
205  * records that would have otherwise have expired.
206  */
207 #define RBTDB_VIRTUAL 300
208
209 struct noqname {
210         dns_name_t      name;
211         void *          neg;
212         void *          negsig;
213         dns_rdatatype_t type;
214 };
215
216 typedef struct acachectl acachectl_t;
217
218 typedef struct rdatasetheader {
219         /*%
220          * Locked by the owning node's lock.
221          */
222         rbtdb_serial_t                  serial;
223         dns_ttl_t                       rdh_ttl;
224         rbtdb_rdatatype_t               type;
225         isc_uint16_t                    attributes;
226         dns_trust_t                     trust;
227         struct noqname                  *noqname;
228         struct noqname                  *closest;
229         /*%<
230          * We don't use the LIST macros, because the LIST structure has
231          * both head and tail pointers, and is doubly linked.
232          */
233
234         struct rdatasetheader           *next;
235         /*%<
236          * If this is the top header for an rdataset, 'next' points
237          * to the top header for the next rdataset (i.e., the next type).
238          * Otherwise, it points up to the header whose down pointer points
239          * at this header.
240          */
241
242         struct rdatasetheader           *down;
243         /*%<
244          * Points to the header for the next older version of
245          * this rdataset.
246          */
247
248         isc_uint32_t                    count;
249         /*%<
250          * Monotonously increased every time this rdataset is bound so that
251          * it is used as the base of the starting point in DNS responses
252          * when the "cyclic" rrset-order is required.  Since the ordering
253          * should not be so crucial, no lock is set for the counter for
254          * performance reasons.
255          */
256
257         acachectl_t                     *additional_auth;
258         acachectl_t                     *additional_glue;
259
260         dns_rbtnode_t                   *node;
261         isc_stdtime_t                   last_used;
262         ISC_LINK(struct rdatasetheader) link;
263
264         unsigned int                    heap_index;
265         /*%<
266          * Used for TTL-based cache cleaning.
267          */
268         isc_stdtime_t                   resign;
269 } rdatasetheader_t;
270
271 typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
272 typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;
273
274 #define RDATASET_ATTR_NONEXISTENT       0x0001
275 #define RDATASET_ATTR_STALE             0x0002
276 #define RDATASET_ATTR_IGNORE            0x0004
277 #define RDATASET_ATTR_RETAIN            0x0008
278 #define RDATASET_ATTR_NXDOMAIN          0x0010
279 #define RDATASET_ATTR_RESIGN            0x0020
280 #define RDATASET_ATTR_STATCOUNT         0x0040
281 #define RDATASET_ATTR_OPTOUT            0x0080
282 #define RDATASET_ATTR_NEGATIVE          0x0100
283
284 typedef struct acache_cbarg {
285         dns_rdatasetadditional_t        type;
286         unsigned int                    count;
287         dns_db_t                        *db;
288         dns_dbnode_t                    *node;
289         rdatasetheader_t                *header;
290 } acache_cbarg_t;
291
292 struct acachectl {
293         dns_acacheentry_t               *entry;
294         acache_cbarg_t                  *cbarg;
295 };
296
297 /*
298  * XXX
299  * When the cache will pre-expire data (due to memory low or other
300  * situations) before the rdataset's TTL has expired, it MUST
301  * respect the RETAIN bit and not expire the data until its TTL is
302  * expired.
303  */
304
305 #undef IGNORE                   /* WIN32 winbase.h defines this. */
306
307 #define EXISTS(header) \
308         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
309 #define NONEXISTENT(header) \
310         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
311 #define IGNORE(header) \
312         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
313 #define RETAIN(header) \
314         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
315 #define NXDOMAIN(header) \
316         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
317 #define RESIGN(header) \
318         (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
319 #define OPTOUT(header) \
320         (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
321 #define NEGATIVE(header) \
322         (((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
323
324 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
325
326 /*%
327  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
328  * There is a tradeoff issue about configuring this value: if this is too
329  * small, it may cause heavier contention between threads; if this is too large,
330  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
331  * The default value should work well for most environments, but this can
332  * also be configurable at compilation time via the
333  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
334  * 1 due to the assumption of overmem_purge().
335  */
336 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
337 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
338 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
339 #else
340 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
341 #endif
342 #else
343 #define DEFAULT_CACHE_NODE_LOCK_COUNT   16
344 #endif  /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
345
346 typedef struct {
347         nodelock_t                      lock;
348         /* Protected in the refcount routines. */
349         isc_refcount_t                  references;
350         /* Locked by lock. */
351         isc_boolean_t                   exiting;
352 } rbtdb_nodelock_t;
353
354 typedef struct rbtdb_changed {
355         dns_rbtnode_t *                 node;
356         isc_boolean_t                   dirty;
357         ISC_LINK(struct rbtdb_changed)  link;
358 } rbtdb_changed_t;
359
360 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
361
362 typedef enum {
363         dns_db_insecure,
364         dns_db_partial,
365         dns_db_secure
366 } dns_db_secure_t;
367
368 typedef struct rbtdb_version {
369         /* Not locked */
370         rbtdb_serial_t                  serial;
371         /*
372          * Protected in the refcount routines.
373          * XXXJT: should we change the lock policy based on the refcount
374          * performance?
375          */
376         isc_refcount_t                  references;
377         /* Locked by database lock. */
378         isc_boolean_t                   writer;
379         isc_boolean_t                   commit_ok;
380         rbtdb_changedlist_t             changed_list;
381         rdatasetheaderlist_t            resigned_list;
382         ISC_LINK(struct rbtdb_version)  link;
383         dns_db_secure_t                 secure;
384         isc_boolean_t                   havensec3;
385         /* NSEC3 parameters */
386         dns_hash_t                      hash;
387         isc_uint8_t                     flags;
388         isc_uint16_t                    iterations;
389         isc_uint8_t                     salt_length;
390         unsigned char                   salt[DNS_NSEC3_SALTSIZE];
391 } rbtdb_version_t;
392
393 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
394
395 typedef struct {
396         /* Unlocked. */
397         dns_db_t                        common;
398         /* Locks the data in this struct */
399 #if DNS_RBTDB_USERWLOCK
400         isc_rwlock_t                    lock;
401 #else
402         isc_mutex_t                     lock;
403 #endif
404         /* Locks the tree structure (prevents nodes appearing/disappearing) */
405         isc_rwlock_t                    tree_lock;
406         /* Locks for individual tree nodes */
407         unsigned int                    node_lock_count;
408         rbtdb_nodelock_t *              node_locks;
409         dns_rbtnode_t *                 origin_node;
410         dns_stats_t *                   rrsetstats; /* cache DB only */
411         /* Locked by lock. */
412         unsigned int                    active;
413         isc_refcount_t                  references;
414         unsigned int                    attributes;
415         rbtdb_serial_t                  current_serial;
416         rbtdb_serial_t                  least_serial;
417         rbtdb_serial_t                  next_serial;
418         rbtdb_version_t *               current_version;
419         rbtdb_version_t *               future_version;
420         rbtdb_versionlist_t             open_versions;
421         isc_task_t *                    task;
422         dns_dbnode_t                    *soanode;
423         dns_dbnode_t                    *nsnode;
424
425         /*
426          * This is a linked list used to implement the LRU cache.  There will
427          * be node_lock_count linked lists here.  Nodes in bucket 1 will be
428          * placed on the linked list rdatasets[1].
429          */
430         rdatasetheaderlist_t            *rdatasets;
431
432         /*%
433          * Temporary storage for stale cache nodes and dynamically deleted
434          * nodes that await being cleaned up.
435          */
436         rbtnodelist_t                   *deadnodes;
437
438         /*
439          * Heaps.  Each of these is used for TTL based expiry.
440          */
441         isc_heap_t                      **heaps;
442
443         /* Locked by tree_lock. */
444         dns_rbt_t *                     tree;
445         dns_rbt_t *                     nsec;
446         dns_rbt_t *                     nsec3;
447         dns_rpz_cidr_t *                rpz_cidr;
448
449         /* Unlocked */
450         unsigned int                    quantum;
451 } dns_rbtdb_t;
452
453 #define RBTDB_ATTR_LOADED               0x01
454 #define RBTDB_ATTR_LOADING              0x02
455
456 /*%
457  * Search Context
458  */
459 typedef struct {
460         dns_rbtdb_t *           rbtdb;
461         rbtdb_version_t *       rbtversion;
462         rbtdb_serial_t          serial;
463         unsigned int            options;
464         dns_rbtnodechain_t      chain;
465         isc_boolean_t           copy_name;
466         isc_boolean_t           need_cleanup;
467         isc_boolean_t           wild;
468         dns_rbtnode_t *         zonecut;
469         rdatasetheader_t *      zonecut_rdataset;
470         rdatasetheader_t *      zonecut_sigrdataset;
471         dns_fixedname_t         zonecut_name;
472         isc_stdtime_t           now;
473 } rbtdb_search_t;
474
475 /*%
476  * Load Context
477  */
478 typedef struct {
479         dns_rbtdb_t *           rbtdb;
480         isc_stdtime_t           now;
481 } rbtdb_load_t;
482
483 static void rdataset_disassociate(dns_rdataset_t *rdataset);
484 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
485 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
486 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
487 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
488 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
489 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
490                                         dns_name_t *name,
491                                         dns_rdataset_t *neg,
492                                         dns_rdataset_t *negsig);
493 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
494                                         dns_name_t *name,
495                                         dns_rdataset_t *neg,
496                                         dns_rdataset_t *negsig);
497 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
498                                            dns_rdatasetadditional_t type,
499                                            dns_rdatatype_t qtype,
500                                            dns_acache_t *acache,
501                                            dns_zone_t **zonep,
502                                            dns_db_t **dbp,
503                                            dns_dbversion_t **versionp,
504                                            dns_dbnode_t **nodep,
505                                            dns_name_t *fname,
506                                            dns_message_t *msg,
507                                            isc_stdtime_t now);
508 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
509                                            dns_rdatasetadditional_t type,
510                                            dns_rdatatype_t qtype,
511                                            dns_acache_t *acache,
512                                            dns_zone_t *zone,
513                                            dns_db_t *db,
514                                            dns_dbversion_t *version,
515                                            dns_dbnode_t *node,
516                                            dns_name_t *fname);
517 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
518                                            dns_rdataset_t *rdataset,
519                                            dns_rdatasetadditional_t type,
520                                            dns_rdatatype_t qtype);
521 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
522                                               isc_stdtime_t now);
523 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
524                           isc_stdtime_t now);
525 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
526                           isc_boolean_t tree_locked);
527 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
528                           isc_stdtime_t now, isc_boolean_t tree_locked);
529 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
530                                   rdatasetheader_t *newheader);
531 static void prune_tree(isc_task_t *task, isc_event_t *event);
532 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
533 static void rdataset_expire(dns_rdataset_t *rdataset);
534
535 static dns_rdatasetmethods_t rdataset_methods = {
536         rdataset_disassociate,
537         rdataset_first,
538         rdataset_next,
539         rdataset_current,
540         rdataset_clone,
541         rdataset_count,
542         NULL,
543         rdataset_getnoqname,
544         NULL,
545         rdataset_getclosest,
546         rdataset_getadditional,
547         rdataset_setadditional,
548         rdataset_putadditional,
549         rdataset_settrust,
550         rdataset_expire
551 };
552
553 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
554 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
555 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
556 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
557                                  dns_rdataset_t *rdataset);
558
559 static dns_rdatasetitermethods_t rdatasetiter_methods = {
560         rdatasetiter_destroy,
561         rdatasetiter_first,
562         rdatasetiter_next,
563         rdatasetiter_current
564 };
565
566 typedef struct rbtdb_rdatasetiter {
567         dns_rdatasetiter_t              common;
568         rdatasetheader_t *              current;
569 } rbtdb_rdatasetiter_t;
570
571 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
572 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
573 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
574 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
575                                         dns_name_t *name);
576 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
577 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
578 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
579                                            dns_dbnode_t **nodep,
580                                            dns_name_t *name);
581 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
582 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
583                                           dns_name_t *name);
584
585 static dns_dbiteratormethods_t dbiterator_methods = {
586         dbiterator_destroy,
587         dbiterator_first,
588         dbiterator_last,
589         dbiterator_seek,
590         dbiterator_prev,
591         dbiterator_next,
592         dbiterator_current,
593         dbiterator_pause,
594         dbiterator_origin
595 };
596
597 #define DELETION_BATCH_MAX 64
598
599 /*
600  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
601  */
602 typedef struct rbtdb_dbiterator {
603         dns_dbiterator_t                common;
604         isc_boolean_t                   paused;
605         isc_boolean_t                   new_origin;
606         isc_rwlocktype_t                tree_locked;
607         isc_result_t                    result;
608         dns_fixedname_t                 name;
609         dns_fixedname_t                 origin;
610         dns_rbtnodechain_t              chain;
611         dns_rbtnodechain_t              nsec3chain;
612         dns_rbtnodechain_t              *current;
613         dns_rbtnode_t                   *node;
614         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
615         int                             delete;
616         isc_boolean_t                   nsec3only;
617         isc_boolean_t                   nonsec3;
618 } rbtdb_dbiterator_t;
619
620
621 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
622 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
623
624 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
625                        isc_event_t *event);
626 static void overmem(dns_db_t *db, isc_boolean_t overmem);
627 #ifdef BIND9
628 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version);
629 #endif
630
631 /*%
632  * 'init_count' is used to initialize 'newheader->count' which inturn
633  * is used to determine where in the cycle rrset-order cyclic starts.
634  * We don't lock this as we don't care about simultaneous updates.
635  *
636  * Note:
637  *      Both init_count and header->count can be ISC_UINT32_MAX.
638  *      The count on the returned rdataset however can't be as
639  *      that indicates that the database does not implement cyclic
640  *      processing.
641  */
642 static unsigned int init_count;
643
644 /*
645  * Locking
646  *
647  * If a routine is going to lock more than one lock in this module, then
648  * the locking must be done in the following order:
649  *
650  *      Tree Lock
651  *
652  *      Node Lock       (Only one from the set may be locked at one time by
653  *                       any caller)
654  *
655  *      Database Lock
656  *
657  * Failure to follow this hierarchy can result in deadlock.
658  */
659
660 /*
661  * Deleting Nodes
662  *
663  * For zone databases the node for the origin of the zone MUST NOT be deleted.
664  */
665
666
667 /*
668  * DB Routines
669  */
670
671 static void
672 attach(dns_db_t *source, dns_db_t **targetp) {
673         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
674
675         REQUIRE(VALID_RBTDB(rbtdb));
676
677         isc_refcount_increment(&rbtdb->references, NULL);
678
679         *targetp = source;
680 }
681
682 static void
683 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
684         dns_rbtdb_t *rbtdb = event->ev_arg;
685
686         UNUSED(task);
687
688         free_rbtdb(rbtdb, ISC_TRUE, event);
689 }
690
691 static void
692 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
693                   isc_boolean_t increment)
694 {
695         dns_rdatastatstype_t statattributes = 0;
696         dns_rdatastatstype_t base = 0;
697         dns_rdatastatstype_t type;
698
699         /* At the moment we count statistics only for cache DB */
700         INSIST(IS_CACHE(rbtdb));
701
702         if (NEGATIVE(header)) {
703                 if (NXDOMAIN(header))
704                         statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
705                 else {
706                         statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
707                         base = RBTDB_RDATATYPE_EXT(header->type);
708                 }
709         } else
710                 base = RBTDB_RDATATYPE_BASE(header->type);
711
712         type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
713         if (increment)
714                 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
715         else
716                 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
717 }
718
719 static void
720 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
721         int idx;
722         isc_heap_t *heap;
723         dns_ttl_t oldttl;
724
725         oldttl = header->rdh_ttl;
726         header->rdh_ttl = newttl;
727
728         if (!IS_CACHE(rbtdb))
729                 return;
730
731         /*
732          * It's possible the rbtdb is not a cache.  If this is the case,
733          * we will not have a heap, and we move on.  If we do, though,
734          * we might need to adjust things.
735          */
736         if (header->heap_index == 0 || newttl == oldttl)
737                 return;
738         idx = header->node->locknum;
739         if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
740             return;
741         heap = rbtdb->heaps[idx];
742
743         if (newttl < oldttl)
744                 isc_heap_increased(heap, header->heap_index);
745         else
746                 isc_heap_decreased(heap, header->heap_index);
747 }
748
749 /*%
750  * These functions allow the heap code to rank the priority of each
751  * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
752  */
753 static isc_boolean_t
754 ttl_sooner(void *v1, void *v2) {
755         rdatasetheader_t *h1 = v1;
756         rdatasetheader_t *h2 = v2;
757
758         if (h1->rdh_ttl < h2->rdh_ttl)
759                 return (ISC_TRUE);
760         return (ISC_FALSE);
761 }
762
763 static isc_boolean_t
764 resign_sooner(void *v1, void *v2) {
765         rdatasetheader_t *h1 = v1;
766         rdatasetheader_t *h2 = v2;
767
768         if (h1->resign < h2->resign)
769                 return (ISC_TRUE);
770         return (ISC_FALSE);
771 }
772
773 /*%
774  * This function sets the heap index into the header.
775  */
776 static void
777 set_index(void *what, unsigned int index) {
778         rdatasetheader_t *h = what;
779
780         h->heap_index = index;
781 }
782
783 /*%
784  * Work out how many nodes can be deleted in the time between two
785  * requests to the nameserver.  Smooth the resulting number and use it
786  * as a estimate for the number of nodes to be deleted in the next
787  * iteration.
788  */
789 static unsigned int
790 adjust_quantum(unsigned int old, isc_time_t *start) {
791         unsigned int pps = dns_pps;     /* packets per second */
792         unsigned int interval;
793         isc_uint64_t usecs;
794         isc_time_t end;
795         unsigned int new;
796
797         if (pps < 100)
798                 pps = 100;
799         isc_time_now(&end);
800
801         interval = 1000000 / pps;       /* interval in usec */
802         if (interval == 0)
803                 interval = 1;
804         usecs = isc_time_microdiff(&end, start);
805         if (usecs == 0) {
806                 /*
807                  * We were unable to measure the amount of time taken.
808                  * Double the nodes deleted next time.
809                  */
810                 old *= 2;
811                 if (old > 1000)
812                         old = 1000;
813                 return (old);
814         }
815         new = old * interval;
816         new /= (unsigned int)usecs;
817         if (new == 0)
818                 new = 1;
819         else if (new > 1000)
820                 new = 1000;
821
822         /* Smooth */
823         new = (new + old * 3) / 4;
824
825         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
826                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
827
828         return (new);
829 }
830
831 static void
832 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
833         unsigned int i;
834         isc_ondestroy_t ondest;
835         isc_result_t result;
836         char buf[DNS_NAME_FORMATSIZE];
837         dns_rbt_t **treep;
838         isc_time_t start;
839
840         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
841                 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
842
843         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
844         REQUIRE(rbtdb->future_version == NULL);
845
846         if (rbtdb->current_version != NULL) {
847                 unsigned int refs;
848
849                 isc_refcount_decrement(&rbtdb->current_version->references,
850                                        &refs);
851                 INSIST(refs == 0);
852                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
853                 isc_refcount_destroy(&rbtdb->current_version->references);
854                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
855                             sizeof(rbtdb_version_t));
856         }
857
858         /*
859          * We assume the number of remaining dead nodes is reasonably small;
860          * the overhead of unlinking all nodes here should be negligible.
861          */
862         for (i = 0; i < rbtdb->node_lock_count; i++) {
863                 dns_rbtnode_t *node;
864
865                 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
866                 while (node != NULL) {
867                         ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
868                         node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
869                 }
870         }
871
872         if (event == NULL)
873                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
874
875         for (;;) {
876                 /*
877                  * pick the next tree to (start to) destroy
878                  */
879                 treep = &rbtdb->tree;
880                 if (*treep == NULL) {
881                         treep = &rbtdb->nsec;
882                         if (*treep == NULL) {
883                                 treep = &rbtdb->nsec3;
884                                 /*
885                                  * we're finished after clear cutting
886                                  */
887                                 if (*treep == NULL)
888                                         break;
889                         }
890                 }
891
892                 isc_time_now(&start);
893                 result = dns_rbt_destroy2(treep, rbtdb->quantum);
894                 if (result == ISC_R_QUOTA) {
895                         INSIST(rbtdb->task != NULL);
896                         if (rbtdb->quantum != 0)
897                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
898                                                                 &start);
899                         if (event == NULL)
900                                 event = isc_event_allocate(rbtdb->common.mctx,
901                                                            NULL,
902                                                          DNS_EVENT_FREESTORAGE,
903                                                            free_rbtdb_callback,
904                                                            rbtdb,
905                                                            sizeof(isc_event_t));
906                         if (event == NULL)
907                                 continue;
908                         isc_task_send(rbtdb->task, &event);
909                         return;
910                 }
911                 INSIST(result == ISC_R_SUCCESS && *treep == NULL);
912         }
913
914         if (event != NULL)
915                 isc_event_free(&event);
916         if (log) {
917                 if (dns_name_dynamic(&rbtdb->common.origin))
918                         dns_name_format(&rbtdb->common.origin, buf,
919                                         sizeof(buf));
920                 else
921                         strcpy(buf, "<UNKNOWN>");
922                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
923                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
924                               "done free_rbtdb(%s)", buf);
925         }
926         if (dns_name_dynamic(&rbtdb->common.origin))
927                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
928         for (i = 0; i < rbtdb->node_lock_count; i++) {
929                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
930                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
931         }
932
933         /*
934          * Clean up LRU / re-signing order lists.
935          */
936         if (rbtdb->rdatasets != NULL) {
937                 for (i = 0; i < rbtdb->node_lock_count; i++)
938                         INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
939                 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
940                             rbtdb->node_lock_count *
941                             sizeof(rdatasetheaderlist_t));
942         }
943         /*
944          * Clean up dead node buckets.
945          */
946         if (rbtdb->deadnodes != NULL) {
947                 for (i = 0; i < rbtdb->node_lock_count; i++)
948                         INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
949                 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
950                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
951         }
952         /*
953          * Clean up heap objects.
954          */
955         if (rbtdb->heaps != NULL) {
956                 for (i = 0; i < rbtdb->node_lock_count; i++)
957                         isc_heap_destroy(&rbtdb->heaps[i]);
958                 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
959                             rbtdb->node_lock_count *
960                             sizeof(isc_heap_t *));
961         }
962
963         if (rbtdb->rrsetstats != NULL)
964                 dns_stats_detach(&rbtdb->rrsetstats);
965
966 #ifdef BIND9
967         if (rbtdb->rpz_cidr != NULL)
968                 dns_rpz_cidr_free(&rbtdb->rpz_cidr);
969 #endif
970
971         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
972                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
973         isc_rwlock_destroy(&rbtdb->tree_lock);
974         isc_refcount_destroy(&rbtdb->references);
975         if (rbtdb->task != NULL)
976                 isc_task_detach(&rbtdb->task);
977
978         RBTDB_DESTROYLOCK(&rbtdb->lock);
979         rbtdb->common.magic = 0;
980         rbtdb->common.impmagic = 0;
981         ondest = rbtdb->common.ondest;
982         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
983         isc_ondestroy_notify(&ondest, rbtdb);
984 }
985
986 static inline void
987 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
988         isc_boolean_t want_free = ISC_FALSE;
989         unsigned int i;
990         unsigned int inactive = 0;
991
992         /* XXX check for open versions here */
993
994         if (rbtdb->soanode != NULL)
995                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
996         if (rbtdb->nsnode != NULL)
997                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
998
999         /*
1000          * Even though there are no external direct references, there still
1001          * may be nodes in use.
1002          */
1003         for (i = 0; i < rbtdb->node_lock_count; i++) {
1004                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1005                 rbtdb->node_locks[i].exiting = ISC_TRUE;
1006                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1007                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1008                     == 0) {
1009                         inactive++;
1010                 }
1011         }
1012
1013         if (inactive != 0) {
1014                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1015                 rbtdb->active -= inactive;
1016                 if (rbtdb->active == 0)
1017                         want_free = ISC_TRUE;
1018                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1019                 if (want_free) {
1020                         char buf[DNS_NAME_FORMATSIZE];
1021                         if (dns_name_dynamic(&rbtdb->common.origin))
1022                                 dns_name_format(&rbtdb->common.origin, buf,
1023                                                 sizeof(buf));
1024                         else
1025                                 strcpy(buf, "<UNKNOWN>");
1026                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1027                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1028                                       "calling free_rbtdb(%s)", buf);
1029                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
1030                 }
1031         }
1032 }
1033
1034 static void
1035 detach(dns_db_t **dbp) {
1036         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1037         unsigned int refs;
1038
1039         REQUIRE(VALID_RBTDB(rbtdb));
1040
1041         isc_refcount_decrement(&rbtdb->references, &refs);
1042
1043         if (refs == 0)
1044                 maybe_free_rbtdb(rbtdb);
1045
1046         *dbp = NULL;
1047 }
1048
1049 static void
1050 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1051         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1052         rbtdb_version_t *version;
1053         unsigned int refs;
1054
1055         REQUIRE(VALID_RBTDB(rbtdb));
1056
1057         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1058         version = rbtdb->current_version;
1059         isc_refcount_increment(&version->references, &refs);
1060         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1061
1062         *versionp = (dns_dbversion_t *)version;
1063 }
1064
1065 static inline rbtdb_version_t *
1066 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1067                  unsigned int references, isc_boolean_t writer)
1068 {
1069         isc_result_t result;
1070         rbtdb_version_t *version;
1071
1072         version = isc_mem_get(mctx, sizeof(*version));
1073         if (version == NULL)
1074                 return (NULL);
1075         version->serial = serial;
1076         result = isc_refcount_init(&version->references, references);
1077         if (result != ISC_R_SUCCESS) {
1078                 isc_mem_put(mctx, version, sizeof(*version));
1079                 return (NULL);
1080         }
1081         version->writer = writer;
1082         version->commit_ok = ISC_FALSE;
1083         ISC_LIST_INIT(version->changed_list);
1084         ISC_LIST_INIT(version->resigned_list);
1085         ISC_LINK_INIT(version, link);
1086
1087         return (version);
1088 }
1089
1090 static isc_result_t
1091 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1092         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1093         rbtdb_version_t *version;
1094
1095         REQUIRE(VALID_RBTDB(rbtdb));
1096         REQUIRE(versionp != NULL && *versionp == NULL);
1097         REQUIRE(rbtdb->future_version == NULL);
1098
1099         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1100         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
1101         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1102                                    ISC_TRUE);
1103         if (version != NULL) {
1104                 version->commit_ok = ISC_TRUE;
1105                 version->secure = rbtdb->current_version->secure;
1106                 version->havensec3 = rbtdb->current_version->havensec3;
1107                 if (version->havensec3) {
1108                         version->flags = rbtdb->current_version->flags;
1109                         version->iterations =
1110                                 rbtdb->current_version->iterations;
1111                         version->hash = rbtdb->current_version->hash;
1112                         version->salt_length =
1113                                 rbtdb->current_version->salt_length;
1114                         memcpy(version->salt, rbtdb->current_version->salt,
1115                                version->salt_length);
1116                 } else {
1117                         version->flags = 0;
1118                         version->iterations = 0;
1119                         version->hash = 0;
1120                         version->salt_length = 0;
1121                         memset(version->salt, 0, sizeof(version->salt));
1122                 }
1123                 rbtdb->next_serial++;
1124                 rbtdb->future_version = version;
1125         }
1126         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1127
1128         if (version == NULL)
1129                 return (ISC_R_NOMEMORY);
1130
1131         *versionp = version;
1132
1133         return (ISC_R_SUCCESS);
1134 }
1135
1136 static void
1137 attachversion(dns_db_t *db, dns_dbversion_t *source,
1138               dns_dbversion_t **targetp)
1139 {
1140         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1141         rbtdb_version_t *rbtversion = source;
1142         unsigned int refs;
1143
1144         REQUIRE(VALID_RBTDB(rbtdb));
1145
1146         isc_refcount_increment(&rbtversion->references, &refs);
1147         INSIST(refs > 1);
1148
1149         *targetp = rbtversion;
1150 }
1151
1152 static rbtdb_changed_t *
1153 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1154             dns_rbtnode_t *node)
1155 {
1156         rbtdb_changed_t *changed;
1157         unsigned int refs;
1158
1159         /*
1160          * Caller must be holding the node lock if its reference must be
1161          * protected by the lock.
1162          */
1163
1164         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1165
1166         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1167
1168         REQUIRE(version->writer);
1169
1170         if (changed != NULL) {
1171                 dns_rbtnode_refincrement(node, &refs);
1172                 INSIST(refs != 0);
1173                 changed->node = node;
1174                 changed->dirty = ISC_FALSE;
1175                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1176         } else
1177                 version->commit_ok = ISC_FALSE;
1178
1179         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1180
1181         return (changed);
1182 }
1183
1184 static void
1185 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1186                  acachectl_t *array)
1187 {
1188         unsigned int count;
1189         unsigned int i;
1190         unsigned char *raw;     /* RDATASLAB */
1191
1192         /*
1193          * The caller must be holding the corresponding node lock.
1194          */
1195
1196         if (array == NULL)
1197                 return;
1198
1199         raw = (unsigned char *)header + sizeof(*header);
1200         count = raw[0] * 256 + raw[1];
1201
1202         /*
1203          * Sanity check: since an additional cache entry has a reference to
1204          * the original DB node (in the callback arg), there should be no
1205          * acache entries when the node can be freed.
1206          */
1207         for (i = 0; i < count; i++)
1208                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1209
1210         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1211 }
1212
1213 static inline void
1214 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1215
1216         if (dns_name_dynamic(&(*noqname)->name))
1217                 dns_name_free(&(*noqname)->name, mctx);
1218         if ((*noqname)->neg != NULL)
1219                 isc_mem_put(mctx, (*noqname)->neg,
1220                             dns_rdataslab_size((*noqname)->neg, 0));
1221         if ((*noqname)->negsig != NULL)
1222                 isc_mem_put(mctx, (*noqname)->negsig,
1223                             dns_rdataslab_size((*noqname)->negsig, 0));
1224         isc_mem_put(mctx, *noqname, sizeof(**noqname));
1225         *noqname = NULL;
1226 }
1227
1228 static inline void
1229 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1230 {
1231         ISC_LINK_INIT(h, link);
1232         h->heap_index = 0;
1233
1234 #if TRACE_HEADER
1235         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1236                 fprintf(stderr, "initialized header: %p\n", h);
1237 #else
1238         UNUSED(rbtdb);
1239 #endif
1240 }
1241
1242 static inline rdatasetheader_t *
1243 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1244 {
1245         rdatasetheader_t *h;
1246
1247         h = isc_mem_get(mctx, sizeof(*h));
1248         if (h == NULL)
1249                 return (NULL);
1250
1251 #if TRACE_HEADER
1252         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1253                 fprintf(stderr, "allocated header: %p\n", h);
1254 #endif
1255         init_rdataset(rbtdb, h);
1256         return (h);
1257 }
1258
1259 static inline void
1260 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1261 {
1262         unsigned int size;
1263         int idx;
1264
1265         if (EXISTS(rdataset) &&
1266             (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1267                 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1268         }
1269
1270         idx = rdataset->node->locknum;
1271         if (ISC_LINK_LINKED(rdataset, link)) {
1272                 INSIST(IS_CACHE(rbtdb));
1273                 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1274         }
1275         if (rdataset->heap_index != 0)
1276                 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1277         rdataset->heap_index = 0;
1278
1279         if (rdataset->noqname != NULL)
1280                 free_noqname(mctx, &rdataset->noqname);
1281         if (rdataset->closest != NULL)
1282                 free_noqname(mctx, &rdataset->closest);
1283
1284         free_acachearray(mctx, rdataset, rdataset->additional_auth);
1285         free_acachearray(mctx, rdataset, rdataset->additional_glue);
1286
1287         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1288                 size = sizeof(*rdataset);
1289         else
1290                 size = dns_rdataslab_size((unsigned char *)rdataset,
1291                                           sizeof(*rdataset));
1292         isc_mem_put(mctx, rdataset, size);
1293 }
1294
1295 static inline void
1296 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1297         rdatasetheader_t *header, *dcurrent;
1298         isc_boolean_t make_dirty = ISC_FALSE;
1299
1300         /*
1301          * Caller must hold the node lock.
1302          */
1303
1304         /*
1305          * We set the IGNORE attribute on rdatasets with serial number
1306          * 'serial'.  When the reference count goes to zero, these rdatasets
1307          * will be cleaned up; until that time, they will be ignored.
1308          */
1309         for (header = node->data; header != NULL; header = header->next) {
1310                 if (header->serial == serial) {
1311                         header->attributes |= RDATASET_ATTR_IGNORE;
1312                         make_dirty = ISC_TRUE;
1313                 }
1314                 for (dcurrent = header->down;
1315                      dcurrent != NULL;
1316                      dcurrent = dcurrent->down) {
1317                         if (dcurrent->serial == serial) {
1318                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1319                                 make_dirty = ISC_TRUE;
1320                         }
1321                 }
1322         }
1323         if (make_dirty)
1324                 node->dirty = 1;
1325 }
1326
1327 static inline void
1328 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1329 {
1330         rdatasetheader_t *d, *down_next;
1331
1332         for (d = top->down; d != NULL; d = down_next) {
1333                 down_next = d->down;
1334                 free_rdataset(rbtdb, mctx, d);
1335         }
1336         top->down = NULL;
1337 }
1338
1339 static inline void
1340 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1341         rdatasetheader_t *current, *top_prev, *top_next;
1342         isc_mem_t *mctx = rbtdb->common.mctx;
1343
1344         /*
1345          * Caller must be holding the node lock.
1346          */
1347
1348         top_prev = NULL;
1349         for (current = node->data; current != NULL; current = top_next) {
1350                 top_next = current->next;
1351                 clean_stale_headers(rbtdb, mctx, current);
1352                 /*
1353                  * If current is nonexistent or stale, we can clean it up.
1354                  */
1355                 if ((current->attributes &
1356                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1357                         if (top_prev != NULL)
1358                                 top_prev->next = current->next;
1359                         else
1360                                 node->data = current->next;
1361                         free_rdataset(rbtdb, mctx, current);
1362                 } else
1363                         top_prev = current;
1364         }
1365         node->dirty = 0;
1366 }
1367
1368 static inline void
1369 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1370                 rbtdb_serial_t least_serial)
1371 {
1372         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1373         rdatasetheader_t *top_prev, *top_next;
1374         isc_mem_t *mctx = rbtdb->common.mctx;
1375         isc_boolean_t still_dirty = ISC_FALSE;
1376
1377         /*
1378          * Caller must be holding the node lock.
1379          */
1380         REQUIRE(least_serial != 0);
1381
1382         top_prev = NULL;
1383         for (current = node->data; current != NULL; current = top_next) {
1384                 top_next = current->next;
1385
1386                 /*
1387                  * First, we clean up any instances of multiple rdatasets
1388                  * with the same serial number, or that have the IGNORE
1389                  * attribute.
1390                  */
1391                 dparent = current;
1392                 for (dcurrent = current->down;
1393                      dcurrent != NULL;
1394                      dcurrent = down_next) {
1395                         down_next = dcurrent->down;
1396                         INSIST(dcurrent->serial <= dparent->serial);
1397                         if (dcurrent->serial == dparent->serial ||
1398                             IGNORE(dcurrent)) {
1399                                 if (down_next != NULL)
1400                                         down_next->next = dparent;
1401                                 dparent->down = down_next;
1402                                 free_rdataset(rbtdb, mctx, dcurrent);
1403                         } else
1404                                 dparent = dcurrent;
1405                 }
1406
1407                 /*
1408                  * We've now eliminated all IGNORE datasets with the possible
1409                  * exception of current, which we now check.
1410                  */
1411                 if (IGNORE(current)) {
1412                         down_next = current->down;
1413                         if (down_next == NULL) {
1414                                 if (top_prev != NULL)
1415                                         top_prev->next = current->next;
1416                                 else
1417                                         node->data = current->next;
1418                                 free_rdataset(rbtdb, mctx, current);
1419                                 /*
1420                                  * current no longer exists, so we can
1421                                  * just continue with the loop.
1422                                  */
1423                                 continue;
1424                         } else {
1425                                 /*
1426                                  * Pull up current->down, making it the new
1427                                  * current.
1428                                  */
1429                                 if (top_prev != NULL)
1430                                         top_prev->next = down_next;
1431                                 else
1432                                         node->data = down_next;
1433                                 down_next->next = top_next;
1434                                 free_rdataset(rbtdb, mctx, current);
1435                                 current = down_next;
1436                         }
1437                 }
1438
1439                 /*
1440                  * We now try to find the first down node less than the
1441                  * least serial.
1442                  */
1443                 dparent = current;
1444                 for (dcurrent = current->down;
1445                      dcurrent != NULL;
1446                      dcurrent = down_next) {
1447                         down_next = dcurrent->down;
1448                         if (dcurrent->serial < least_serial)
1449                                 break;
1450                         dparent = dcurrent;
1451                 }
1452
1453                 /*
1454                  * If there is a such an rdataset, delete it and any older
1455                  * versions.
1456                  */
1457                 if (dcurrent != NULL) {
1458                         do {
1459                                 down_next = dcurrent->down;
1460                                 INSIST(dcurrent->serial <= least_serial);
1461                                 free_rdataset(rbtdb, mctx, dcurrent);
1462                                 dcurrent = down_next;
1463                         } while (dcurrent != NULL);
1464                         dparent->down = NULL;
1465                 }
1466
1467                 /*
1468                  * Note.  The serial number of 'current' might be less than
1469                  * least_serial too, but we cannot delete it because it is
1470                  * the most recent version, unless it is a NONEXISTENT
1471                  * rdataset.
1472                  */
1473                 if (current->down != NULL) {
1474                         still_dirty = ISC_TRUE;
1475                         top_prev = current;
1476                 } else {
1477                         /*
1478                          * If this is a NONEXISTENT rdataset, we can delete it.
1479                          */
1480                         if (NONEXISTENT(current)) {
1481                                 if (top_prev != NULL)
1482                                         top_prev->next = current->next;
1483                                 else
1484                                         node->data = current->next;
1485                                 free_rdataset(rbtdb, mctx, current);
1486                         } else
1487                                 top_prev = current;
1488                 }
1489         }
1490         if (!still_dirty)
1491                 node->dirty = 0;
1492 }
1493
1494 static void
1495 delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node)
1496 {
1497         dns_rbtnode_t *nsecnode;
1498         dns_fixedname_t fname;
1499         dns_name_t *name;
1500         isc_result_t result = ISC_R_UNEXPECTED;
1501
1502         INSIST(!ISC_LINK_LINKED(node, deadlink));
1503
1504         switch (node->nsec) {
1505         case DNS_RBT_NSEC_NORMAL:
1506 #ifdef BIND9
1507                 if (rbtdb->rpz_cidr != NULL) {
1508                         dns_fixedname_init(&fname);
1509                         name = dns_fixedname_name(&fname);
1510                         dns_rbt_fullnamefromnode(node, name);
1511                         dns_rpz_cidr_deleteip(rbtdb->rpz_cidr, name);
1512                 }
1513 #endif
1514                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1515                 break;
1516         case DNS_RBT_NSEC_HAS_NSEC:
1517                 dns_fixedname_init(&fname);
1518                 name = dns_fixedname_name(&fname);
1519                 dns_rbt_fullnamefromnode(node, name);
1520                 /*
1521                  * Delete the corresponding node from the auxiliary NSEC
1522                  * tree before deleting from the main tree.
1523                  */
1524                 nsecnode = NULL;
1525                 result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode,
1526                                           NULL, DNS_RBTFIND_EMPTYDATA,
1527                                           NULL, NULL);
1528                 if (result != ISC_R_SUCCESS) {
1529                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1530                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1531                                       "delete_node: "
1532                                       "dns_rbt_findnode(nsec): %s",
1533                                       isc_result_totext(result));
1534                 } else {
1535                         result = dns_rbt_deletenode(rbtdb->nsec, nsecnode,
1536                                                     ISC_FALSE);
1537                         if (result != ISC_R_SUCCESS) {
1538                                 isc_log_write(dns_lctx,
1539                                               DNS_LOGCATEGORY_DATABASE,
1540                                               DNS_LOGMODULE_CACHE,
1541                                               ISC_LOG_WARNING,
1542                                               "delete_nsecnode(): "
1543                                               "dns_rbt_deletenode(nsecnode): %s",
1544                                               isc_result_totext(result));
1545                         }
1546                 }
1547                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1548 #ifdef BIND9
1549                 dns_rpz_cidr_deleteip(rbtdb->rpz_cidr, name);
1550 #endif
1551                 break;
1552         case DNS_RBT_NSEC_NSEC:
1553                 result = dns_rbt_deletenode(rbtdb->nsec, node, ISC_FALSE);
1554                 break;
1555         case DNS_RBT_NSEC_NSEC3:
1556                 result = dns_rbt_deletenode(rbtdb->nsec3, node, ISC_FALSE);
1557                 break;
1558         }
1559         if (result != ISC_R_SUCCESS) {
1560                 isc_log_write(dns_lctx,
1561                               DNS_LOGCATEGORY_DATABASE,
1562                               DNS_LOGMODULE_CACHE,
1563                               ISC_LOG_WARNING,
1564                               "delete_nsecnode(): "
1565                               "dns_rbt_deletenode: %s",
1566                               isc_result_totext(result));
1567         }
1568 }
1569
1570 /*%
1571  * Clean up dead nodes.  These are nodes which have no references, and
1572  * have no data.  They are dead but we could not or chose not to delete
1573  * them when we deleted all the data at that node because we did not want
1574  * to wait for the tree write lock.
1575  *
1576  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1577  */
1578 static void
1579 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1580         dns_rbtnode_t *node;
1581         int count = 10;         /* XXXJT: should be adjustable */
1582
1583         node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1584         while (node != NULL && count > 0) {
1585                 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1586
1587                 /*
1588                  * Since we're holding a tree write lock, it should be
1589                  * impossible for this node to be referenced by others.
1590                  */
1591                 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1592                        node->data == NULL);
1593
1594                 delete_node(rbtdb, node);
1595
1596                 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1597                 count--;
1598         }
1599 }
1600
1601 /*
1602  * Caller must be holding the node lock if its reference must be protected
1603  * by the lock.
1604  */
1605 static inline void
1606 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1607         unsigned int lockrefs, noderefs;
1608         isc_refcount_t *lockref;
1609
1610         dns_rbtnode_refincrement0(node, &noderefs);
1611         if (noderefs == 1) {    /* this is the first reference to the node */
1612                 lockref = &rbtdb->node_locks[node->locknum].references;
1613                 isc_refcount_increment0(lockref, &lockrefs);
1614                 INSIST(lockrefs != 0);
1615         }
1616         INSIST(noderefs != 0);
1617 }
1618
1619 /*
1620  * This function is assumed to be called when a node is newly referenced
1621  * and can be in the deadnode list.  In that case the node must be retrieved
1622  * from the list because it is going to be used.  In addition, if the caller
1623  * happens to hold a write lock on the tree, it's a good chance to purge dead
1624  * nodes.
1625  * Note: while a new reference is gained in multiple places, there are only very
1626  * few cases where the node can be in the deadnode list (only empty nodes can
1627  * have been added to the list).
1628  */
1629 static inline void
1630 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1631                 isc_rwlocktype_t treelocktype)
1632 {
1633         isc_boolean_t need_relock = ISC_FALSE;
1634
1635         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1636         new_reference(rbtdb, node);
1637
1638         NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1639                       isc_rwlocktype_read);
1640         if (ISC_LINK_LINKED(node, deadlink))
1641                 need_relock = ISC_TRUE;
1642         else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1643                  treelocktype == isc_rwlocktype_write)
1644                 need_relock = ISC_TRUE;
1645         NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1646                         isc_rwlocktype_read);
1647         if (need_relock) {
1648                 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1649                               isc_rwlocktype_write);
1650                 if (ISC_LINK_LINKED(node, deadlink))
1651                         ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1652                                         node, deadlink);
1653                 if (treelocktype == isc_rwlocktype_write)
1654                         cleanup_dead_nodes(rbtdb, node->locknum);
1655                 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1656                                 isc_rwlocktype_write);
1657         }
1658
1659         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1660 }
1661
1662 /*
1663  * Caller must be holding the node lock; either the "strong", read or write
1664  * lock.  Note that the lock must be held even when node references are
1665  * atomically modified; in that case the decrement operation itself does not
1666  * have to be protected, but we must avoid a race condition where multiple
1667  * threads are decreasing the reference to zero simultaneously and at least
1668  * one of them is going to free the node.
1669  * This function returns ISC_TRUE if and only if the node reference decreases
1670  * to zero.
1671  */
1672 static isc_boolean_t
1673 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1674                     rbtdb_serial_t least_serial,
1675                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1676                     isc_boolean_t pruning)
1677 {
1678         isc_result_t result;
1679         isc_boolean_t write_locked;
1680         rbtdb_nodelock_t *nodelock;
1681         unsigned int refs, nrefs;
1682         int bucket = node->locknum;
1683         isc_boolean_t no_reference;
1684
1685         nodelock = &rbtdb->node_locks[bucket];
1686
1687         /* Handle easy and typical case first. */
1688         if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1689                 dns_rbtnode_refdecrement(node, &nrefs);
1690                 INSIST((int)nrefs >= 0);
1691                 if (nrefs == 0) {
1692                         isc_refcount_decrement(&nodelock->references, &refs);
1693                         INSIST((int)refs >= 0);
1694                 }
1695                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1696         }
1697
1698         /* Upgrade the lock? */
1699         if (nlock == isc_rwlocktype_read) {
1700                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1701                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1702         }
1703         dns_rbtnode_refdecrement(node, &nrefs);
1704         INSIST((int)nrefs >= 0);
1705         if (nrefs > 0) {
1706                 /* Restore the lock? */
1707                 if (nlock == isc_rwlocktype_read)
1708                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1709                 return (ISC_FALSE);
1710         }
1711
1712         if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1713                 if (IS_CACHE(rbtdb))
1714                         clean_cache_node(rbtdb, node);
1715                 else {
1716                         if (least_serial == 0) {
1717                                 /*
1718                                  * Caller doesn't know the least serial.
1719                                  * Get it.
1720                                  */
1721                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1722                                 least_serial = rbtdb->least_serial;
1723                                 RBTDB_UNLOCK(&rbtdb->lock,
1724                                              isc_rwlocktype_read);
1725                         }
1726                         clean_zone_node(rbtdb, node, least_serial);
1727                 }
1728         }
1729
1730         isc_refcount_decrement(&nodelock->references, &refs);
1731         INSIST((int)refs >= 0);
1732
1733         /*
1734          * XXXDCL should this only be done for cache zones?
1735          */
1736         if (node->data != NULL || node->down != NULL) {
1737                 /* Restore the lock? */
1738                 if (nlock == isc_rwlocktype_read)
1739                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1740                 return (ISC_TRUE);
1741         }
1742
1743         /*
1744          * Attempt to switch to a write lock on the tree.  If this fails,
1745          * we will add this node to a linked list of nodes in this locking
1746          * bucket which we will free later.
1747          */
1748         if (tlock != isc_rwlocktype_write) {
1749                 /*
1750                  * Locking hierarchy notwithstanding, we don't need to free
1751                  * the node lock before acquiring the tree write lock because
1752                  * we only do a trylock.
1753                  */
1754                 if (tlock == isc_rwlocktype_read)
1755                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1756                 else
1757                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
1758                                                     isc_rwlocktype_write);
1759                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1760                               result == ISC_R_LOCKBUSY);
1761
1762                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1763         } else
1764                 write_locked = ISC_TRUE;
1765
1766         no_reference = ISC_TRUE;
1767         if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1768                 /*
1769                  * We can now delete the node if the reference counter is
1770                  * zero.  This should be typically the case, but a different
1771                  * thread may still gain a (new) reference just before the
1772                  * current thread locks the tree (e.g., in findnode()).
1773                  */
1774
1775                 /*
1776                  * If this node is the only one in the level it's in, deleting
1777                  * this node may recursively make its parent the only node in
1778                  * the parent level; if so, and if no one is currently using
1779                  * the parent node, this is almost the only opportunity to
1780                  * clean it up.  But the recursive cleanup is not that trivial
1781                  * since the child and parent may be in different lock buckets,
1782                  * which would cause a lock order reversal problem.  To avoid
1783                  * the trouble, we'll dispatch a separate event for batch
1784                  * cleaning.  We need to check whether we're deleting the node
1785                  * as a result of pruning to avoid infinite dispatching.
1786                  * Note: pruning happens only when a task has been set for the
1787                  * rbtdb.  If the user of the rbtdb chooses not to set a task,
1788                  * it's their responsibility to purge stale leaves (e.g. by
1789                  * periodic walk-through).
1790                  */
1791                 if (!pruning && node->parent != NULL &&
1792                     node->parent->down == node && node->left == NULL &&
1793                     node->right == NULL && rbtdb->task != NULL) {
1794                         isc_event_t *ev;
1795                         dns_db_t *db;
1796
1797                         ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1798                                                 DNS_EVENT_RBTPRUNE,
1799                                                 prune_tree, node,
1800                                                 sizeof(isc_event_t));
1801                         if (ev != NULL) {
1802                                 new_reference(rbtdb, node);
1803                                 db = NULL;
1804                                 attach((dns_db_t *)rbtdb, &db);
1805                                 ev->ev_sender = db;
1806                                 isc_task_send(rbtdb->task, &ev);
1807                                 no_reference = ISC_FALSE;
1808                         } else {
1809                                 /*
1810                                  * XXX: this is a weird situation.  We could
1811                                  * ignore this error case, but then the stale
1812                                  * node will unlikely be purged except via a
1813                                  * rare condition such as manual cleanup.  So
1814                                  * we queue it in the deadnodes list, hoping
1815                                  * the memory shortage is temporary and the node
1816                                  * will be deleted later.
1817                                  */
1818                                 isc_log_write(dns_lctx,
1819                                               DNS_LOGCATEGORY_DATABASE,
1820                                               DNS_LOGMODULE_CACHE,
1821                                               ISC_LOG_INFO,
1822                                               "decrement_reference: failed to "
1823                                               "allocate pruning event");
1824                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1825                                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1826                                                 deadlink);
1827                         }
1828                 } else {
1829                         if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1830                                 char printname[DNS_NAME_FORMATSIZE];
1831
1832                                 isc_log_write(dns_lctx,
1833                                               DNS_LOGCATEGORY_DATABASE,
1834                                               DNS_LOGMODULE_CACHE,
1835                                               ISC_LOG_DEBUG(1),
1836                                               "decrement_reference: "
1837                                               "delete from rbt: %p %s",
1838                                               node,
1839                                               dns_rbt_formatnodename(node,
1840                                                         printname,
1841                                                         sizeof(printname)));
1842                         }
1843
1844                         delete_node(rbtdb, node);
1845                 }
1846         } else if (dns_rbtnode_refcurrent(node) == 0) {
1847                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1848                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1849         } else
1850                 no_reference = ISC_FALSE;
1851
1852         /* Restore the lock? */
1853         if (nlock == isc_rwlocktype_read)
1854                 NODE_WEAKDOWNGRADE(&nodelock->lock);
1855
1856         /*
1857          * Relock a read lock, or unlock the write lock if no lock was held.
1858          */
1859         if (tlock == isc_rwlocktype_none)
1860                 if (write_locked)
1861                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1862
1863         if (tlock == isc_rwlocktype_read)
1864                 if (write_locked)
1865                         isc_rwlock_downgrade(&rbtdb->tree_lock);
1866
1867         return (no_reference);
1868 }
1869
1870 /*
1871  * Prune the tree by recursively cleaning-up single leaves.  In the worst
1872  * case, the number of iteration is the number of tree levels, which is at
1873  * most the maximum number of domain name labels, i.e, 127.  In practice, this
1874  * should be much smaller (only a few times), and even the worst case would be
1875  * acceptable for a single event.
1876  */
1877 static void
1878 prune_tree(isc_task_t *task, isc_event_t *event) {
1879         dns_rbtdb_t *rbtdb = event->ev_sender;
1880         dns_rbtnode_t *node = event->ev_arg;
1881         dns_rbtnode_t *parent;
1882         unsigned int locknum;
1883
1884         UNUSED(task);
1885
1886         isc_event_free(&event);
1887
1888         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1889         locknum = node->locknum;
1890         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1891         do {
1892                 parent = node->parent;
1893                 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1894                                     isc_rwlocktype_write, ISC_TRUE);
1895
1896                 if (parent != NULL && parent->down == NULL) {
1897                         /*
1898                          * node was the only down child of the parent and has
1899                          * just been removed.  We'll then need to examine the
1900                          * parent.  Keep the lock if possible; otherwise,
1901                          * release the old lock and acquire one for the parent.
1902                          */
1903                         if (parent->locknum != locknum) {
1904                                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1905                                             isc_rwlocktype_write);
1906                                 locknum = parent->locknum;
1907                                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1908                                           isc_rwlocktype_write);
1909                         }
1910
1911                         /*
1912                          * We need to gain a reference to the node before
1913                          * decrementing it in the next iteration.  In addition,
1914                          * if the node is in the dead-nodes list, extract it
1915                          * from the list beforehand as we do in
1916                          * reactivate_node().
1917                          */
1918                         new_reference(rbtdb, parent);
1919                         if (ISC_LINK_LINKED(parent, deadlink)) {
1920                                 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1921                                                 parent, deadlink);
1922                         }
1923                 } else
1924                         parent = NULL;
1925
1926                 node = parent;
1927         } while (node != NULL);
1928         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1929         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1930
1931         detach((dns_db_t **)&rbtdb);
1932 }
1933
1934 static inline void
1935 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1936                    rbtdb_changedlist_t *cleanup_list)
1937 {
1938         /*
1939          * Caller must be holding the database lock.
1940          */
1941
1942         rbtdb->least_serial = version->serial;
1943         *cleanup_list = version->changed_list;
1944         ISC_LIST_INIT(version->changed_list);
1945 }
1946
1947 static inline void
1948 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1949         rbtdb_changed_t *changed, *next_changed;
1950
1951         /*
1952          * If the changed record is dirty, then
1953          * an update created multiple versions of
1954          * a given rdataset.  We keep this list
1955          * until we're the least open version, at
1956          * which point it's safe to get rid of any
1957          * older versions.
1958          *
1959          * If the changed record isn't dirty, then
1960          * we don't need it anymore since we're
1961          * committing and not rolling back.
1962          *
1963          * The caller must be holding the database lock.
1964          */
1965         for (changed = HEAD(version->changed_list);
1966              changed != NULL;
1967              changed = next_changed) {
1968                 next_changed = NEXT(changed, link);
1969                 if (!changed->dirty) {
1970                         UNLINK(version->changed_list,
1971                                changed, link);
1972                         APPEND(*cleanup_list,
1973                                changed, link);
1974                 }
1975         }
1976 }
1977
1978 static void
1979 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1980 #ifndef BIND9
1981         UNUSED(db);
1982         UNUSED(version);
1983         UNUSED(origin);
1984
1985         return;
1986 #else
1987         dns_rdataset_t keyset;
1988         dns_rdataset_t nsecset, signsecset;
1989         isc_boolean_t haszonekey = ISC_FALSE;
1990         isc_boolean_t hasnsec = ISC_FALSE;
1991         isc_result_t result;
1992
1993         dns_rdataset_init(&keyset);
1994         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1995                                      0, 0, &keyset, NULL);
1996         if (result == ISC_R_SUCCESS) {
1997                 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1998                 result = dns_rdataset_first(&keyset);
1999                 while (result == ISC_R_SUCCESS) {
2000                         dns_rdataset_current(&keyset, &keyrdata);
2001                         if (dns_zonekey_iszonekey(&keyrdata)) {
2002                                 haszonekey = ISC_TRUE;
2003                                 break;
2004                         }
2005                         result = dns_rdataset_next(&keyset);
2006                 }
2007                 dns_rdataset_disassociate(&keyset);
2008         }
2009         if (!haszonekey) {
2010                 version->secure = dns_db_insecure;
2011                 version->havensec3 = ISC_FALSE;
2012                 return;
2013         }
2014
2015         dns_rdataset_init(&nsecset);
2016         dns_rdataset_init(&signsecset);
2017         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
2018                                      0, 0, &nsecset, &signsecset);
2019         if (result == ISC_R_SUCCESS) {
2020                 if (dns_rdataset_isassociated(&signsecset)) {
2021                         hasnsec = ISC_TRUE;
2022                         dns_rdataset_disassociate(&signsecset);
2023                 }
2024                 dns_rdataset_disassociate(&nsecset);
2025         }
2026
2027         setnsec3parameters(db, version);
2028
2029         /*
2030          * Do we have a valid NSEC/NSEC3 chain?
2031          */
2032         if (version->havensec3 || hasnsec)
2033                 version->secure = dns_db_secure;
2034         else
2035                 version->secure = dns_db_insecure;
2036 #endif
2037 }
2038
2039 /*%<
2040  * Walk the origin node looking for NSEC3PARAM records.
2041  * Cache the nsec3 parameters.
2042  */
2043 #ifdef BIND9
2044 static void
2045 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version) {
2046         dns_rbtnode_t *node;
2047         dns_rdata_nsec3param_t nsec3param;
2048         dns_rdata_t rdata = DNS_RDATA_INIT;
2049         isc_region_t region;
2050         isc_result_t result;
2051         rdatasetheader_t *header, *header_next;
2052         unsigned char *raw;             /* RDATASLAB */
2053         unsigned int count, length;
2054         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2055
2056         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2057         version->havensec3 = ISC_FALSE;
2058         node = rbtdb->origin_node;
2059         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2060                   isc_rwlocktype_read);
2061         for (header = node->data;
2062              header != NULL;
2063              header = header_next) {
2064                 header_next = header->next;
2065                 do {
2066                         if (header->serial <= version->serial &&
2067                             !IGNORE(header)) {
2068                                 if (NONEXISTENT(header))
2069                                         header = NULL;
2070                                 break;
2071                         } else
2072                                 header = header->down;
2073                 } while (header != NULL);
2074
2075                 if (header != NULL &&
2076                     (header->type == dns_rdatatype_nsec3param)) {
2077                         /*
2078                          * Find A NSEC3PARAM with a supported algorithm.
2079                          */
2080                         raw = (unsigned char *)header + sizeof(*header);
2081                         count = raw[0] * 256 + raw[1]; /* count */
2082 #if DNS_RDATASET_FIXED
2083                         raw += count * 4 + 2;
2084 #else
2085                         raw += 2;
2086 #endif
2087                         while (count-- > 0U) {
2088                                 length = raw[0] * 256 + raw[1];
2089 #if DNS_RDATASET_FIXED
2090                                 raw += 4;
2091 #else
2092                                 raw += 2;
2093 #endif
2094                                 region.base = raw;
2095                                 region.length = length;
2096                                 raw += length;
2097                                 dns_rdata_fromregion(&rdata,
2098                                                      rbtdb->common.rdclass,
2099                                                      dns_rdatatype_nsec3param,
2100                                                      &region);
2101                                 result = dns_rdata_tostruct(&rdata,
2102                                                             &nsec3param,
2103                                                             NULL);
2104                                 INSIST(result == ISC_R_SUCCESS);
2105                                 dns_rdata_reset(&rdata);
2106
2107                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2108                                     !dns_nsec3_supportedhash(nsec3param.hash))
2109                                         continue;
2110
2111                                 if (nsec3param.flags != 0)
2112                                         continue;
2113
2114                                 memcpy(version->salt, nsec3param.salt,
2115                                        nsec3param.salt_length);
2116                                 version->hash = nsec3param.hash;
2117                                 version->salt_length = nsec3param.salt_length;
2118                                 version->iterations = nsec3param.iterations;
2119                                 version->flags = nsec3param.flags;
2120                                 version->havensec3 = ISC_TRUE;
2121                                 /*
2122                                  * Look for a better algorithm than the
2123                                  * unknown test algorithm.
2124                                  */
2125                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2126                                         goto unlock;
2127                         }
2128                 }
2129         }
2130  unlock:
2131         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2132                     isc_rwlocktype_read);
2133         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2134 }
2135 #endif
2136
2137 static void
2138 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
2139         dns_rbtdb_t *rbtdb = event->ev_arg;
2140         isc_boolean_t again = ISC_FALSE;
2141         unsigned int locknum;
2142         unsigned int refs;
2143
2144         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2145         for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
2146                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2147                           isc_rwlocktype_write);
2148                 cleanup_dead_nodes(rbtdb, locknum);
2149                 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL)
2150                         again = ISC_TRUE;
2151                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2152                             isc_rwlocktype_write);
2153         }
2154         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2155         if (again)
2156                 isc_task_send(task, &event);
2157         else {
2158                 isc_event_free(&event);
2159                 isc_refcount_decrement(&rbtdb->references, &refs);
2160                 if (refs == 0)
2161                         maybe_free_rbtdb(rbtdb);
2162         }
2163 }
2164
2165 static void
2166 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2167         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2168         rbtdb_version_t *version, *cleanup_version, *least_greater;
2169         isc_boolean_t rollback = ISC_FALSE;
2170         rbtdb_changedlist_t cleanup_list;
2171         rdatasetheaderlist_t resigned_list;
2172         rbtdb_changed_t *changed, *next_changed;
2173         rbtdb_serial_t serial, least_serial;
2174         dns_rbtnode_t *rbtnode;
2175         unsigned int refs;
2176         rdatasetheader_t *header;
2177         isc_boolean_t writer;
2178
2179         REQUIRE(VALID_RBTDB(rbtdb));
2180         version = (rbtdb_version_t *)*versionp;
2181
2182         cleanup_version = NULL;
2183         ISC_LIST_INIT(cleanup_list);
2184         ISC_LIST_INIT(resigned_list);
2185
2186         isc_refcount_decrement(&version->references, &refs);
2187         if (refs > 0) {         /* typical and easy case first */
2188                 if (commit) {
2189                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2190                         INSIST(!version->writer);
2191                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2192                 }
2193                 goto end;
2194         }
2195
2196         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2197         serial = version->serial;
2198         writer = version->writer;
2199         if (version->writer) {
2200                 if (commit) {
2201                         unsigned cur_ref;
2202                         rbtdb_version_t *cur_version;
2203
2204                         INSIST(version->commit_ok);
2205                         INSIST(version == rbtdb->future_version);
2206                         /*
2207                          * The current version is going to be replaced.
2208                          * Release the (likely last) reference to it from the
2209                          * DB itself and unlink it from the open list.
2210                          */
2211                         cur_version = rbtdb->current_version;
2212                         isc_refcount_decrement(&cur_version->references,
2213                                                &cur_ref);
2214                         if (cur_ref == 0) {
2215                                 if (cur_version->serial == rbtdb->least_serial)
2216                                         INSIST(EMPTY(cur_version->changed_list));
2217                                 UNLINK(rbtdb->open_versions,
2218                                        cur_version, link);
2219                         }
2220                         if (EMPTY(rbtdb->open_versions)) {
2221                                 /*
2222                                  * We're going to become the least open
2223                                  * version.
2224                                  */
2225                                 make_least_version(rbtdb, version,
2226                                                    &cleanup_list);
2227                         } else {
2228                                 /*
2229                                  * Some other open version is the
2230                                  * least version.  We can't cleanup
2231                                  * records that were changed in this
2232                                  * version because the older versions
2233                                  * may still be in use by an open
2234                                  * version.
2235                                  *
2236                                  * We can, however, discard the
2237                                  * changed records for things that
2238                                  * we've added that didn't exist in
2239                                  * prior versions.
2240                                  */
2241                                 cleanup_nondirty(version, &cleanup_list);
2242                         }
2243                         /*
2244                          * If the (soon to be former) current version
2245                          * isn't being used by anyone, we can clean
2246                          * it up.
2247                          */
2248                         if (cur_ref == 0) {
2249                                 cleanup_version = cur_version;
2250                                 APPENDLIST(version->changed_list,
2251                                            cleanup_version->changed_list,
2252                                            link);
2253                         }
2254                         /*
2255                          * Become the current version.
2256                          */
2257                         version->writer = ISC_FALSE;
2258                         rbtdb->current_version = version;
2259                         rbtdb->current_serial = version->serial;
2260                         rbtdb->future_version = NULL;
2261
2262                         /*
2263                          * Keep the current version in the open list, and
2264                          * gain a reference for the DB itself (see the DB
2265                          * creation function below).  This must be the only
2266                          * case where we need to increment the counter from
2267                          * zero and need to use isc_refcount_increment0().
2268                          */
2269                         isc_refcount_increment0(&version->references,
2270                                                 &cur_ref);
2271                         INSIST(cur_ref == 1);
2272                         PREPEND(rbtdb->open_versions,
2273                                 rbtdb->current_version, link);
2274                         resigned_list = version->resigned_list;
2275                         ISC_LIST_INIT(version->resigned_list);
2276                 } else {
2277                         /*
2278                          * We're rolling back this transaction.
2279                          */
2280                         cleanup_list = version->changed_list;
2281                         ISC_LIST_INIT(version->changed_list);
2282                         resigned_list = version->resigned_list;
2283                         ISC_LIST_INIT(version->resigned_list);
2284                         rollback = ISC_TRUE;
2285                         cleanup_version = version;
2286                         rbtdb->future_version = NULL;
2287                 }
2288         } else {
2289                 if (version != rbtdb->current_version) {
2290                         /*
2291                          * There are no external or internal references
2292                          * to this version and it can be cleaned up.
2293                          */
2294                         cleanup_version = version;
2295
2296                         /*
2297                          * Find the version with the least serial
2298                          * number greater than ours.
2299                          */
2300                         least_greater = PREV(version, link);
2301                         if (least_greater == NULL)
2302                                 least_greater = rbtdb->current_version;
2303
2304                         INSIST(version->serial < least_greater->serial);
2305                         /*
2306                          * Is this the least open version?
2307                          */
2308                         if (version->serial == rbtdb->least_serial) {
2309                                 /*
2310                                  * Yes.  Install the new least open
2311                                  * version.
2312                                  */
2313                                 make_least_version(rbtdb,
2314                                                    least_greater,
2315                                                    &cleanup_list);
2316                         } else {
2317                                 /*
2318                                  * Add any unexecuted cleanups to
2319                                  * those of the least greater version.
2320                                  */
2321                                 APPENDLIST(least_greater->changed_list,
2322                                            version->changed_list,
2323                                            link);
2324                         }
2325                 } else if (version->serial == rbtdb->least_serial)
2326                         INSIST(EMPTY(version->changed_list));
2327                 UNLINK(rbtdb->open_versions, version, link);
2328         }
2329         least_serial = rbtdb->least_serial;
2330         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2331
2332         /*
2333          * Update the zone's secure status.
2334          */
2335         if (writer && commit && !IS_CACHE(rbtdb))
2336                 iszonesecure(db, version, rbtdb->origin_node);
2337
2338         if (cleanup_version != NULL) {
2339                 INSIST(EMPTY(cleanup_version->changed_list));
2340                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2341                             sizeof(*cleanup_version));
2342         }
2343
2344         /*
2345          * Commit/rollback re-signed headers.
2346          */
2347         for (header = HEAD(resigned_list);
2348              header != NULL;
2349              header = HEAD(resigned_list)) {
2350                 nodelock_t *lock;
2351
2352                 ISC_LIST_UNLINK(resigned_list, header, link);
2353
2354                 lock = &rbtdb->node_locks[header->node->locknum].lock;
2355                 NODE_LOCK(lock, isc_rwlocktype_write);
2356                 if (rollback)
2357                         resign_insert(rbtdb, header->node->locknum, header);
2358                 decrement_reference(rbtdb, header->node, least_serial,
2359                                     isc_rwlocktype_write, isc_rwlocktype_none,
2360                                     ISC_FALSE);
2361                 NODE_UNLOCK(lock, isc_rwlocktype_write);
2362         }
2363
2364         if (!EMPTY(cleanup_list)) {
2365                 isc_event_t *event = NULL;
2366                 isc_rwlocktype_t tlock = isc_rwlocktype_none;
2367
2368                 if (rbtdb->task != NULL)
2369                         event = isc_event_allocate(rbtdb->common.mctx, NULL,
2370                                                    DNS_EVENT_RBTDEADNODES,
2371                                                    cleanup_dead_nodes_callback,
2372                                                    rbtdb, sizeof(isc_event_t));
2373                 if (event == NULL) {
2374                         /*
2375                          * We acquire a tree write lock here in order to make
2376                          * sure that stale nodes will be removed in
2377                          * decrement_reference().  If we didn't have the lock,
2378                          * those nodes could miss the chance to be removed
2379                          * until the server stops.  The write lock is
2380                          * expensive, but this event should be rare enough
2381                          * to justify the cost.
2382                          */
2383                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2384                         tlock = isc_rwlocktype_write;
2385                 }
2386
2387                 for (changed = HEAD(cleanup_list);
2388                      changed != NULL;
2389                      changed = next_changed) {
2390                         nodelock_t *lock;
2391
2392                         next_changed = NEXT(changed, link);
2393                         rbtnode = changed->node;
2394                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2395
2396                         NODE_LOCK(lock, isc_rwlocktype_write);
2397                         /*
2398                          * This is a good opportunity to purge any dead nodes,
2399                          * so use it.
2400                          */
2401                         if (event == NULL)
2402                                 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2403
2404                         if (rollback)
2405                                 rollback_node(rbtnode, serial);
2406                         decrement_reference(rbtdb, rbtnode, least_serial,
2407                                             isc_rwlocktype_write, tlock,
2408                                             ISC_FALSE);
2409
2410                         NODE_UNLOCK(lock, isc_rwlocktype_write);
2411
2412                         isc_mem_put(rbtdb->common.mctx, changed,
2413                                     sizeof(*changed));
2414                 }
2415                 if (event != NULL) {
2416                         isc_refcount_increment(&rbtdb->references, NULL);
2417                         isc_task_send(rbtdb->task, &event);
2418                 } else
2419                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2420         }
2421
2422  end:
2423         *versionp = NULL;
2424 }
2425
2426 /*
2427  * Add the necessary magic for the wildcard name 'name'
2428  * to be found in 'rbtdb'.
2429  *
2430  * In order for wildcard matching to work correctly in
2431  * zone_find(), we must ensure that a node for the wildcarding
2432  * level exists in the database, and has its 'find_callback'
2433  * and 'wild' bits set.
2434  *
2435  * E.g. if the wildcard name is "*.sub.example." then we
2436  * must ensure that "sub.example." exists and is marked as
2437  * a wildcard level.
2438  */
2439 static isc_result_t
2440 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2441         isc_result_t result;
2442         dns_name_t foundname;
2443         dns_offsets_t offsets;
2444         unsigned int n;
2445         dns_rbtnode_t *node = NULL;
2446
2447         dns_name_init(&foundname, offsets);
2448         n = dns_name_countlabels(name);
2449         INSIST(n >= 2);
2450         n--;
2451         dns_name_getlabelsequence(name, 1, n, &foundname);
2452         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2453         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2454                 return (result);
2455         if (result == ISC_R_SUCCESS)
2456                 node->nsec = DNS_RBT_NSEC_NORMAL;
2457         node->find_callback = 1;
2458         node->wild = 1;
2459         return (ISC_R_SUCCESS);
2460 }
2461
2462 static isc_result_t
2463 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2464         isc_result_t result;
2465         dns_name_t foundname;
2466         dns_offsets_t offsets;
2467         unsigned int n, l, i;
2468
2469         dns_name_init(&foundname, offsets);
2470         n = dns_name_countlabels(name);
2471         l = dns_name_countlabels(&rbtdb->common.origin);
2472         i = l + 1;
2473         while (i < n) {
2474                 dns_rbtnode_t *node = NULL;     /* dummy */
2475                 dns_name_getlabelsequence(name, n - i, i, &foundname);
2476                 if (dns_name_iswildcard(&foundname)) {
2477                         result = add_wildcard_magic(rbtdb, &foundname);
2478                         if (result != ISC_R_SUCCESS)
2479                                 return (result);
2480                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
2481                                                  &node);
2482                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2483                                 return (result);
2484                         if (result == ISC_R_SUCCESS)
2485                                 node->nsec = DNS_RBT_NSEC_NORMAL;
2486                 }
2487                 i++;
2488         }
2489         return (ISC_R_SUCCESS);
2490 }
2491
2492 static isc_result_t
2493 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2494          dns_dbnode_t **nodep)
2495 {
2496         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2497         dns_rbtnode_t *node = NULL;
2498         dns_name_t nodename;
2499         isc_result_t result;
2500         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2501
2502         REQUIRE(VALID_RBTDB(rbtdb));
2503
2504         dns_name_init(&nodename, NULL);
2505         RWLOCK(&rbtdb->tree_lock, locktype);
2506         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2507                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2508         if (result != ISC_R_SUCCESS) {
2509                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2510                 if (!create) {
2511                         if (result == DNS_R_PARTIALMATCH)
2512                                 result = ISC_R_NOTFOUND;
2513                         return (result);
2514                 }
2515                 /*
2516                  * It would be nice to try to upgrade the lock instead of
2517                  * unlocking then relocking.
2518                  */
2519                 locktype = isc_rwlocktype_write;
2520                 RWLOCK(&rbtdb->tree_lock, locktype);
2521                 node = NULL;
2522                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2523                 if (result == ISC_R_SUCCESS) {
2524 #ifdef BIND9
2525                         if (rbtdb->rpz_cidr != NULL) {
2526                                 dns_fixedname_t fnamef;
2527                                 dns_name_t *fname;
2528
2529                                 dns_fixedname_init(&fnamef);
2530                                 fname = dns_fixedname_name(&fnamef);
2531                                 dns_rbt_fullnamefromnode(node, fname);
2532                                 dns_rpz_cidr_addip(rbtdb->rpz_cidr, fname);
2533                         }
2534 #endif
2535                         dns_rbt_namefromnode(node, &nodename);
2536 #ifdef DNS_RBT_USEHASH
2537                         node->locknum = node->hashval % rbtdb->node_lock_count;
2538 #else
2539                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2540                                 rbtdb->node_lock_count;
2541 #endif
2542                         add_empty_wildcards(rbtdb, name);
2543
2544                         if (dns_name_iswildcard(name)) {
2545                                 result = add_wildcard_magic(rbtdb, name);
2546                                 if (result != ISC_R_SUCCESS) {
2547                                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2548                                         return (result);
2549                                 }
2550                         }
2551                 } else if (result != ISC_R_EXISTS) {
2552                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2553                         return (result);
2554                 }
2555         }
2556         reactivate_node(rbtdb, node, locktype);
2557         RWUNLOCK(&rbtdb->tree_lock, locktype);
2558
2559         *nodep = (dns_dbnode_t *)node;
2560
2561         return (ISC_R_SUCCESS);
2562 }
2563
2564 static isc_result_t
2565 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2566               dns_dbnode_t **nodep)
2567 {
2568         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2569         dns_rbtnode_t *node = NULL;
2570         dns_name_t nodename;
2571         isc_result_t result;
2572         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2573
2574         REQUIRE(VALID_RBTDB(rbtdb));
2575
2576         dns_name_init(&nodename, NULL);
2577         RWLOCK(&rbtdb->tree_lock, locktype);
2578         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2579                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2580         if (result != ISC_R_SUCCESS) {
2581                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2582                 if (!create) {
2583                         if (result == DNS_R_PARTIALMATCH)
2584                                 result = ISC_R_NOTFOUND;
2585                         return (result);
2586                 }
2587                 /*
2588                  * It would be nice to try to upgrade the lock instead of
2589                  * unlocking then relocking.
2590                  */
2591                 locktype = isc_rwlocktype_write;
2592                 RWLOCK(&rbtdb->tree_lock, locktype);
2593                 node = NULL;
2594                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2595                 if (result == ISC_R_SUCCESS) {
2596                         dns_rbt_namefromnode(node, &nodename);
2597 #ifdef DNS_RBT_USEHASH
2598                         node->locknum = node->hashval % rbtdb->node_lock_count;
2599 #else
2600                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2601                                 rbtdb->node_lock_count;
2602 #endif
2603                         node->nsec = DNS_RBT_NSEC_NSEC3;
2604                 } else if (result != ISC_R_EXISTS) {
2605                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2606                         return (result);
2607                 }
2608         } else {
2609                 INSIST(node->nsec == DNS_RBT_NSEC_NSEC3);
2610         }
2611         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2612         new_reference(rbtdb, node);
2613         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2614         RWUNLOCK(&rbtdb->tree_lock, locktype);
2615
2616         *nodep = (dns_dbnode_t *)node;
2617
2618         return (ISC_R_SUCCESS);
2619 }
2620
2621 static isc_result_t
2622 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2623         rbtdb_search_t *search = arg;
2624         rdatasetheader_t *header, *header_next;
2625         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2626         rdatasetheader_t *found;
2627         isc_result_t result;
2628         dns_rbtnode_t *onode;
2629
2630         /*
2631          * We only want to remember the topmost zone cut, since it's the one
2632          * that counts, so we'll just continue if we've already found a
2633          * zonecut.
2634          */
2635         if (search->zonecut != NULL)
2636                 return (DNS_R_CONTINUE);
2637
2638         found = NULL;
2639         result = DNS_R_CONTINUE;
2640         onode = search->rbtdb->origin_node;
2641
2642         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2643                   isc_rwlocktype_read);
2644
2645         /*
2646          * Look for an NS or DNAME rdataset active in our version.
2647          */
2648         ns_header = NULL;
2649         dname_header = NULL;
2650         sigdname_header = NULL;
2651         for (header = node->data; header != NULL; header = header_next) {
2652                 header_next = header->next;
2653                 if (header->type == dns_rdatatype_ns ||
2654                     header->type == dns_rdatatype_dname ||
2655                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
2656                         do {
2657                                 if (header->serial <= search->serial &&
2658                                     !IGNORE(header)) {
2659                                         /*
2660                                          * Is this a "this rdataset doesn't
2661                                          * exist" record?
2662                                          */
2663                                         if (NONEXISTENT(header))
2664                                                 header = NULL;
2665                                         break;
2666                                 } else
2667                                         header = header->down;
2668                         } while (header != NULL);
2669                         if (header != NULL) {
2670                                 if (header->type == dns_rdatatype_dname)
2671                                         dname_header = header;
2672                                 else if (header->type ==
2673                                            RBTDB_RDATATYPE_SIGDNAME)
2674                                         sigdname_header = header;
2675                                 else if (node != onode ||
2676                                          IS_STUB(search->rbtdb)) {
2677                                         /*
2678                                          * We've found an NS rdataset that
2679                                          * isn't at the origin node.  We check
2680                                          * that they're not at the origin node,
2681                                          * because otherwise we'd erroneously
2682                                          * treat the zone top as if it were
2683                                          * a delegation.
2684                                          */
2685                                         ns_header = header;
2686                                 }
2687                         }
2688                 }
2689         }
2690
2691         /*
2692          * Did we find anything?
2693          */
2694         if (dname_header != NULL) {
2695                 /*
2696                  * Note that DNAME has precedence over NS if both exist.
2697                  */
2698                 found = dname_header;
2699                 search->zonecut_sigrdataset = sigdname_header;
2700         } else if (ns_header != NULL) {
2701                 found = ns_header;
2702                 search->zonecut_sigrdataset = NULL;
2703         }
2704
2705         if (found != NULL) {
2706                 /*
2707                  * We increment the reference count on node to ensure that
2708                  * search->zonecut_rdataset will still be valid later.
2709                  */
2710                 new_reference(search->rbtdb, node);
2711                 search->zonecut = node;
2712                 search->zonecut_rdataset = found;
2713                 search->need_cleanup = ISC_TRUE;
2714                 /*
2715                  * Since we've found a zonecut, anything beneath it is
2716                  * glue and is not subject to wildcard matching, so we
2717                  * may clear search->wild.
2718                  */
2719                 search->wild = ISC_FALSE;
2720                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2721                         /*
2722                          * If the caller does not want to find glue, then
2723                          * this is the best answer and the search should
2724                          * stop now.
2725                          */
2726                         result = DNS_R_PARTIALMATCH;
2727                 } else {
2728                         dns_name_t *zcname;
2729
2730                         /*
2731                          * The search will continue beneath the zone cut.
2732                          * This may or may not be the best match.  In case it
2733                          * is, we need to remember the node name.
2734                          */
2735                         zcname = dns_fixedname_name(&search->zonecut_name);
2736                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2737                                       ISC_R_SUCCESS);
2738                         search->copy_name = ISC_TRUE;
2739                 }
2740         } else {
2741                 /*
2742                  * There is no zonecut at this node which is active in this
2743                  * version.
2744                  *
2745                  * If this is a "wild" node and the caller hasn't disabled
2746                  * wildcard matching, remember that we've seen a wild node
2747                  * in case we need to go searching for wildcard matches
2748                  * later on.
2749                  */
2750                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2751                         search->wild = ISC_TRUE;
2752         }
2753
2754         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2755                     isc_rwlocktype_read);
2756
2757         return (result);
2758 }
2759
2760 static inline void
2761 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2762               rdatasetheader_t *header, isc_stdtime_t now,
2763               dns_rdataset_t *rdataset)
2764 {
2765         unsigned char *raw;     /* RDATASLAB */
2766
2767         /*
2768          * Caller must be holding the node reader lock.
2769          * XXXJT: technically, we need a writer lock, since we'll increment
2770          * the header count below.  However, since the actual counter value
2771          * doesn't matter, we prioritize performance here.  (We may want to
2772          * use atomic increment when available).
2773          */
2774
2775         if (rdataset == NULL)
2776                 return;
2777
2778         new_reference(rbtdb, node);
2779
2780         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
2781
2782         rdataset->methods = &rdataset_methods;
2783         rdataset->rdclass = rbtdb->common.rdclass;
2784         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2785         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2786         rdataset->ttl = header->rdh_ttl - now;
2787         rdataset->trust = header->trust;
2788         if (NEGATIVE(header))
2789                 rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
2790         if (NXDOMAIN(header))
2791                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2792         if (OPTOUT(header))
2793                 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2794         rdataset->private1 = rbtdb;
2795         rdataset->private2 = node;
2796         raw = (unsigned char *)header + sizeof(*header);
2797         rdataset->private3 = raw;
2798         rdataset->count = header->count++;
2799         if (rdataset->count == ISC_UINT32_MAX)
2800                 rdataset->count = 0;
2801
2802         /*
2803          * Reset iterator state.
2804          */
2805         rdataset->privateuint4 = 0;
2806         rdataset->private5 = NULL;
2807
2808         /*
2809          * Add noqname proof.
2810          */
2811         rdataset->private6 = header->noqname;
2812         if (rdataset->private6 != NULL)
2813                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
2814         rdataset->private7 = header->closest;
2815         if (rdataset->private7 != NULL)
2816                 rdataset->attributes |=  DNS_RDATASETATTR_CLOSEST;
2817
2818         /*
2819          * Copy out re-signing information.
2820          */
2821         if (RESIGN(header)) {
2822                 rdataset->attributes |=  DNS_RDATASETATTR_RESIGN;
2823                 rdataset->resign = header->resign;
2824         } else
2825                 rdataset->resign = 0;
2826 }
2827
2828 static inline isc_result_t
2829 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2830                  dns_name_t *foundname, dns_rdataset_t *rdataset,
2831                  dns_rdataset_t *sigrdataset)
2832 {
2833         isc_result_t result;
2834         dns_name_t *zcname;
2835         rbtdb_rdatatype_t type;
2836         dns_rbtnode_t *node;
2837
2838         /*
2839          * The caller MUST NOT be holding any node locks.
2840          */
2841
2842         node = search->zonecut;
2843         type = search->zonecut_rdataset->type;
2844
2845         /*
2846          * If we have to set foundname, we do it before anything else.
2847          * If we were to set foundname after we had set nodep or bound the
2848          * rdataset, then we'd have to undo that work if dns_name_copy()
2849          * failed.  By setting foundname first, there's nothing to undo if
2850          * we have trouble.
2851          */
2852         if (foundname != NULL && search->copy_name) {
2853                 zcname = dns_fixedname_name(&search->zonecut_name);
2854                 result = dns_name_copy(zcname, foundname, NULL);
2855                 if (result != ISC_R_SUCCESS)
2856                         return (result);
2857         }
2858         if (nodep != NULL) {
2859                 /*
2860                  * Note that we don't have to increment the node's reference
2861                  * count here because we're going to use the reference we
2862                  * already have in the search block.
2863                  */
2864                 *nodep = node;
2865                 search->need_cleanup = ISC_FALSE;
2866         }
2867         if (rdataset != NULL) {
2868                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2869                           isc_rwlocktype_read);
2870                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2871                               search->now, rdataset);
2872                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2873                         bind_rdataset(search->rbtdb, node,
2874                                       search->zonecut_sigrdataset,
2875                                       search->now, sigrdataset);
2876                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2877                             isc_rwlocktype_read);
2878         }
2879
2880         if (type == dns_rdatatype_dname)
2881                 return (DNS_R_DNAME);
2882         return (DNS_R_DELEGATION);
2883 }
2884
2885 static inline isc_boolean_t
2886 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2887            dns_rbtnode_t *node)
2888 {
2889         unsigned char *raw;     /* RDATASLAB */
2890         unsigned int count, size;
2891         dns_name_t ns_name;
2892         isc_boolean_t valid = ISC_FALSE;
2893         dns_offsets_t offsets;
2894         isc_region_t region;
2895         rdatasetheader_t *header;
2896
2897         /*
2898          * No additional locking is required.
2899          */
2900
2901         /*
2902          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
2903          * if it occurs at a zone cut, but is not valid below it.
2904          */
2905         if (type == dns_rdatatype_ns) {
2906                 if (node != search->zonecut) {
2907                         return (ISC_FALSE);
2908                 }
2909         } else if (type != dns_rdatatype_a &&
2910                    type != dns_rdatatype_aaaa &&
2911                    type != dns_rdatatype_a6) {
2912                 return (ISC_FALSE);
2913         }
2914
2915         header = search->zonecut_rdataset;
2916         raw = (unsigned char *)header + sizeof(*header);
2917         count = raw[0] * 256 + raw[1];
2918 #if DNS_RDATASET_FIXED
2919         raw += 2 + (4 * count);
2920 #else
2921         raw += 2;
2922 #endif
2923
2924         while (count > 0) {
2925                 count--;
2926                 size = raw[0] * 256 + raw[1];
2927 #if DNS_RDATASET_FIXED
2928                 raw += 4;
2929 #else
2930                 raw += 2;
2931 #endif
2932                 region.base = raw;
2933                 region.length = size;
2934                 raw += size;
2935                 /*
2936                  * XXX Until we have rdata structures, we have no choice but
2937                  * to directly access the rdata format.
2938                  */
2939                 dns_name_init(&ns_name, offsets);
2940                 dns_name_fromregion(&ns_name, &region);
2941                 if (dns_name_compare(&ns_name, name) == 0) {
2942                         valid = ISC_TRUE;
2943                         break;
2944                 }
2945         }
2946
2947         return (valid);
2948 }
2949
2950 static inline isc_boolean_t
2951 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2952             dns_name_t *name)
2953 {
2954         dns_fixedname_t fnext;
2955         dns_fixedname_t forigin;
2956         dns_name_t *next;
2957         dns_name_t *origin;
2958         dns_name_t prefix;
2959         dns_rbtdb_t *rbtdb;
2960         dns_rbtnode_t *node;
2961         isc_result_t result;
2962         isc_boolean_t answer = ISC_FALSE;
2963         rdatasetheader_t *header;
2964
2965         rbtdb = search->rbtdb;
2966
2967         dns_name_init(&prefix, NULL);
2968         dns_fixedname_init(&fnext);
2969         next = dns_fixedname_name(&fnext);
2970         dns_fixedname_init(&forigin);
2971         origin = dns_fixedname_name(&forigin);
2972
2973         result = dns_rbtnodechain_next(chain, NULL, NULL);
2974         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2975                 node = NULL;
2976                 result = dns_rbtnodechain_current(chain, &prefix,
2977                                                   origin, &node);
2978                 if (result != ISC_R_SUCCESS)
2979                         break;
2980                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2981                           isc_rwlocktype_read);
2982                 for (header = node->data;
2983                      header != NULL;
2984                      header = header->next) {
2985                         if (header->serial <= search->serial &&
2986                             !IGNORE(header) && EXISTS(header))
2987                                 break;
2988                 }
2989                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2990                             isc_rwlocktype_read);
2991                 if (header != NULL)
2992                         break;
2993                 result = dns_rbtnodechain_next(chain, NULL, NULL);
2994         }
2995         if (result == ISC_R_SUCCESS)
2996                 result = dns_name_concatenate(&prefix, origin, next, NULL);
2997         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2998                 answer = ISC_TRUE;
2999         return (answer);
3000 }
3001
3002 static inline isc_boolean_t
3003 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
3004         dns_fixedname_t fnext;
3005         dns_fixedname_t forigin;
3006         dns_fixedname_t fprev;
3007         dns_name_t *next;
3008         dns_name_t *origin;
3009         dns_name_t *prev;
3010         dns_name_t name;
3011         dns_name_t rname;
3012         dns_name_t tname;
3013         dns_rbtdb_t *rbtdb;
3014         dns_rbtnode_t *node;
3015         dns_rbtnodechain_t chain;
3016         isc_boolean_t check_next = ISC_TRUE;
3017         isc_boolean_t check_prev = ISC_TRUE;
3018         isc_boolean_t answer = ISC_FALSE;
3019         isc_result_t result;
3020         rdatasetheader_t *header;
3021         unsigned int n;
3022
3023         rbtdb = search->rbtdb;
3024
3025         dns_name_init(&name, NULL);
3026         dns_name_init(&tname, NULL);
3027         dns_name_init(&rname, NULL);
3028         dns_fixedname_init(&fnext);
3029         next = dns_fixedname_name(&fnext);
3030         dns_fixedname_init(&fprev);
3031         prev = dns_fixedname_name(&fprev);
3032         dns_fixedname_init(&forigin);
3033         origin = dns_fixedname_name(&forigin);
3034
3035         /*
3036          * Find if qname is at or below a empty node.
3037          * Use our own copy of the chain.
3038          */
3039
3040         chain = search->chain;
3041         do {
3042                 node = NULL;
3043                 result = dns_rbtnodechain_current(&chain, &name,
3044                                                   origin, &node);
3045                 if (result != ISC_R_SUCCESS)
3046                         break;
3047                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3048                           isc_rwlocktype_read);
3049                 for (header = node->data;
3050                      header != NULL;
3051                      header = header->next) {
3052                         if (header->serial <= search->serial &&
3053                             !IGNORE(header) && EXISTS(header))
3054                                 break;
3055                 }
3056                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3057                             isc_rwlocktype_read);
3058                 if (header != NULL)
3059                         break;
3060                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
3061         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
3062         if (result == ISC_R_SUCCESS)
3063                 result = dns_name_concatenate(&name, origin, prev, NULL);
3064         if (result != ISC_R_SUCCESS)
3065                 check_prev = ISC_FALSE;
3066
3067         result = dns_rbtnodechain_next(&chain, NULL, NULL);
3068         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3069                 node = NULL;
3070                 result = dns_rbtnodechain_current(&chain, &name,
3071                                                   origin, &node);
3072                 if (result != ISC_R_SUCCESS)
3073                         break;
3074                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3075                           isc_rwlocktype_read);
3076                 for (header = node->data;
3077                      header != NULL;
3078                      header = header->next) {
3079                         if (header->serial <= search->serial &&
3080                             !IGNORE(header) && EXISTS(header))
3081                                 break;
3082                 }
3083                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3084                             isc_rwlocktype_read);
3085                 if (header != NULL)
3086                         break;
3087                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3088         }
3089         if (result == ISC_R_SUCCESS)
3090                 result = dns_name_concatenate(&name, origin, next, NULL);
3091         if (result != ISC_R_SUCCESS)
3092                 check_next = ISC_FALSE;
3093
3094         dns_name_clone(qname, &rname);
3095
3096         /*
3097          * Remove the wildcard label to find the terminal name.
3098          */
3099         n = dns_name_countlabels(wname);
3100         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3101
3102         do {
3103                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3104                     (check_next && dns_name_issubdomain(next, &rname))) {
3105                         answer = ISC_TRUE;
3106                         break;
3107                 }
3108                 /*
3109                  * Remove the left hand label.
3110                  */
3111                 n = dns_name_countlabels(&rname);
3112                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3113         } while (!dns_name_equal(&rname, &tname));
3114         return (answer);
3115 }
3116
3117 static inline isc_result_t
3118 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3119               dns_name_t *qname)
3120 {
3121         unsigned int i, j;
3122         dns_rbtnode_t *node, *level_node, *wnode;
3123         rdatasetheader_t *header;
3124         isc_result_t result = ISC_R_NOTFOUND;
3125         dns_name_t name;
3126         dns_name_t *wname;
3127         dns_fixedname_t fwname;
3128         dns_rbtdb_t *rbtdb;
3129         isc_boolean_t done, wild, active;
3130         dns_rbtnodechain_t wchain;
3131
3132         /*
3133          * Caller must be holding the tree lock and MUST NOT be holding
3134          * any node locks.
3135          */
3136
3137         /*
3138          * Examine each ancestor level.  If the level's wild bit
3139          * is set, then construct the corresponding wildcard name and
3140          * search for it.  If the wildcard node exists, and is active in
3141          * this version, we're done.  If not, then we next check to see
3142          * if the ancestor is active in this version.  If so, then there
3143          * can be no possible wildcard match and again we're done.  If not,
3144          * continue the search.
3145          */
3146
3147         rbtdb = search->rbtdb;
3148         i = search->chain.level_matches;
3149         done = ISC_FALSE;
3150         node = *nodep;
3151         do {
3152                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3153                           isc_rwlocktype_read);
3154
3155                 /*
3156                  * First we try to figure out if this node is active in
3157                  * the search's version.  We do this now, even though we
3158                  * may not need the information, because it simplifies the
3159                  * locking and code flow.
3160                  */
3161                 for (header = node->data;
3162                      header != NULL;
3163                      header = header->next) {
3164                         if (header->serial <= search->serial &&
3165                             !IGNORE(header) && EXISTS(header))
3166                                 break;
3167                 }
3168                 if (header != NULL)
3169                         active = ISC_TRUE;
3170                 else
3171                         active = ISC_FALSE;
3172
3173                 if (node->wild)
3174                         wild = ISC_TRUE;
3175                 else
3176                         wild = ISC_FALSE;
3177
3178                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3179                             isc_rwlocktype_read);
3180
3181                 if (wild) {
3182                         /*
3183                          * Construct the wildcard name for this level.
3184                          */
3185                         dns_name_init(&name, NULL);
3186                         dns_rbt_namefromnode(node, &name);
3187                         dns_fixedname_init(&fwname);
3188                         wname = dns_fixedname_name(&fwname);
3189                         result = dns_name_concatenate(dns_wildcardname, &name,
3190                                                       wname, NULL);
3191                         j = i;
3192                         while (result == ISC_R_SUCCESS && j != 0) {
3193                                 j--;
3194                                 level_node = search->chain.levels[j];
3195                                 dns_name_init(&name, NULL);
3196                                 dns_rbt_namefromnode(level_node, &name);
3197                                 result = dns_name_concatenate(wname,
3198                                                               &name,
3199                                                               wname,
3200                                                               NULL);
3201                         }
3202                         if (result != ISC_R_SUCCESS)
3203                                 break;
3204
3205                         wnode = NULL;
3206                         dns_rbtnodechain_init(&wchain, NULL);
3207                         result = dns_rbt_findnode(rbtdb->tree, wname,
3208                                                   NULL, &wnode, &wchain,
3209                                                   DNS_RBTFIND_EMPTYDATA,
3210                                                   NULL, NULL);
3211                         if (result == ISC_R_SUCCESS) {
3212                                 nodelock_t *lock;
3213
3214                                 /*
3215                                  * We have found the wildcard node.  If it
3216                                  * is active in the search's version, we're
3217                                  * done.
3218                                  */
3219                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
3220                                 NODE_LOCK(lock, isc_rwlocktype_read);
3221                                 for (header = wnode->data;
3222                                      header != NULL;
3223                                      header = header->next) {
3224                                         if (header->serial <= search->serial &&
3225                                             !IGNORE(header) && EXISTS(header))
3226                                                 break;
3227                                 }
3228                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3229                                 if (header != NULL ||
3230                                     activeempty(search, &wchain, wname)) {
3231                                         if (activeemtpynode(search, qname,
3232                                                             wname)) {
3233                                                 return (ISC_R_NOTFOUND);
3234                                         }
3235                                         /*
3236                                          * The wildcard node is active!
3237                                          *
3238                                          * Note: result is still ISC_R_SUCCESS
3239                                          * so we don't have to set it.
3240                                          */
3241                                         *nodep = wnode;
3242                                         break;
3243                                 }
3244                         } else if (result != ISC_R_NOTFOUND &&
3245                                    result != DNS_R_PARTIALMATCH) {
3246                                 /*
3247                                  * An error has occurred.  Bail out.
3248                                  */
3249                                 break;
3250                         }
3251                 }
3252
3253                 if (active) {
3254                         /*
3255                          * The level node is active.  Any wildcarding
3256                          * present at higher levels has no
3257                          * effect and we're done.
3258                          */
3259                         result = ISC_R_NOTFOUND;
3260                         break;
3261                 }
3262
3263                 if (i > 0) {
3264                         i--;
3265                         node = search->chain.levels[i];
3266                 } else
3267                         done = ISC_TRUE;
3268         } while (!done);
3269
3270         return (result);
3271 }
3272
3273 static isc_boolean_t
3274 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3275 {
3276         dns_rdata_t rdata = DNS_RDATA_INIT;
3277         dns_rdata_nsec3_t nsec3;
3278         unsigned char *raw;                     /* RDATASLAB */
3279         unsigned int rdlen, count;
3280         isc_region_t region;
3281         isc_result_t result;
3282
3283         REQUIRE(header->type == dns_rdatatype_nsec3);
3284
3285         raw = (unsigned char *)header + sizeof(*header);
3286         count = raw[0] * 256 + raw[1]; /* count */
3287 #if DNS_RDATASET_FIXED
3288         raw += count * 4 + 2;
3289 #else
3290         raw += 2;
3291 #endif
3292         while (count-- > 0) {
3293                 rdlen = raw[0] * 256 + raw[1];
3294 #if DNS_RDATASET_FIXED
3295                 raw += 4;
3296 #else
3297                 raw += 2;
3298 #endif
3299                 region.base = raw;
3300                 region.length = rdlen;
3301                 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3302                                      dns_rdatatype_nsec3, &region);
3303                 raw += rdlen;
3304                 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3305                 INSIST(result == ISC_R_SUCCESS);
3306                 if (nsec3.hash == search->rbtversion->hash &&
3307                     nsec3.iterations == search->rbtversion->iterations &&
3308                     nsec3.salt_length == search->rbtversion->salt_length &&
3309                     memcmp(nsec3.salt, search->rbtversion->salt,
3310                            nsec3.salt_length) == 0)
3311                         return (ISC_TRUE);
3312                 dns_rdata_reset(&rdata);
3313         }
3314         return (ISC_FALSE);
3315 }
3316
3317 /*
3318  * Find node of the NSEC/NSEC3 record that is 'name'.
3319  */
3320 static inline isc_result_t
3321 previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search,
3322                     dns_name_t *name, dns_name_t *origin,
3323                     dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain,
3324                     isc_boolean_t *firstp)
3325 {
3326         dns_fixedname_t ftarget;
3327         dns_name_t *target;
3328         dns_rbtnode_t *nsecnode;
3329         isc_result_t result;
3330
3331         REQUIRE(nodep != NULL && *nodep == NULL);
3332
3333         if (type == dns_rdatatype_nsec3) {
3334                 result = dns_rbtnodechain_prev(&search->chain, NULL, NULL);
3335                 if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN)
3336                         return (result);
3337                 result = dns_rbtnodechain_current(&search->chain, name, origin,
3338                                                   nodep);
3339                 return (result);
3340         }
3341
3342         dns_fixedname_init(&ftarget);
3343         target = dns_fixedname_name(&ftarget);
3344
3345         for (;;) {
3346                 if (*firstp) {
3347                         /*
3348                          * Construct the name of the second node to check.
3349                          * It is the first node sought in the NSEC tree.
3350                          */
3351                         *firstp = ISC_FALSE;
3352                         dns_rbtnodechain_init(nsecchain, NULL);
3353                         result = dns_name_concatenate(name, origin,
3354                                                       target, NULL);
3355                         if (result != ISC_R_SUCCESS)
3356                                 return (result);
3357                         nsecnode = NULL;
3358                         result = dns_rbt_findnode(search->rbtdb->nsec,
3359                                                   target, NULL,
3360                                                   &nsecnode, nsecchain,
3361                                                   DNS_RBTFIND_NOOPTIONS,
3362                                                   NULL, NULL);
3363                         if (result == ISC_R_SUCCESS) {
3364                                 /*
3365                                  * Since this was the first loop, finding the
3366                                  * name in the NSEC tree implies that the first
3367                                  * node checked in the main tree had an
3368                                  * unacceptable NSEC record.
3369                                  * Try the previous node in the NSEC tree.
3370                                  */
3371                                 result = dns_rbtnodechain_prev(nsecchain,
3372                                                                name, origin);
3373                                 if (result == DNS_R_NEWORIGIN)
3374                                         result = ISC_R_SUCCESS;
3375                         } else if (result == ISC_R_NOTFOUND ||
3376                                    result == DNS_R_PARTIALMATCH) {
3377                                 result = dns_rbtnodechain_current(nsecchain,
3378                                                         name, origin, NULL);
3379                                 if (result == ISC_R_NOTFOUND)
3380                                         result = ISC_R_NOMORE;
3381                         }
3382                 } else {
3383                         /*
3384                          * This is a second or later trip through the auxiliary
3385                          * tree for the name of a third or earlier NSEC node in
3386                          * the main tree.  Previous trips through the NSEC tree
3387                          * must have found nodes in the main tree with NSEC
3388                          * records.  Perhaps they lacked signature records.
3389                          */
3390                         result = dns_rbtnodechain_prev(nsecchain, name, origin);
3391                         if (result == DNS_R_NEWORIGIN)
3392                                 result = ISC_R_SUCCESS;
3393                 }
3394                 if (result != ISC_R_SUCCESS)
3395                         return (result);
3396
3397                 /*
3398                  * Construct the name to seek in the main tree.
3399                  */
3400                 result = dns_name_concatenate(name, origin, target, NULL);
3401                 if (result != ISC_R_SUCCESS)
3402                         return (result);
3403
3404                 *nodep = NULL;
3405                 result = dns_rbt_findnode(search->rbtdb->tree, target, NULL,
3406                                           nodep, &search->chain,
3407                                           DNS_RBTFIND_NOOPTIONS, NULL, NULL);
3408                 if (result == ISC_R_SUCCESS)
3409                         return (result);
3410
3411                 /*
3412                  * There should always be a node in the main tree with the
3413                  * same name as the node in the auxiliary NSEC tree, except for
3414                  * nodes in the auxiliary tree that are awaiting deletion.
3415                  */
3416                 if (result != DNS_R_PARTIALMATCH && result != ISC_R_NOTFOUND) {
3417                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
3418                                       DNS_LOGMODULE_CACHE, ISC_LOG_ERROR,
3419                                       "previous_closest_nsec(): %s",
3420                                       isc_result_totext(result));
3421                         return (DNS_R_BADDB);
3422                 }
3423         }
3424 }
3425
3426 /*
3427  * Find the NSEC/NSEC3 which is or before the current point on the
3428  * search chain.  For NSEC3 records only NSEC3 records that match the
3429  * current NSEC3PARAM record are considered.
3430  */
3431 static inline isc_result_t
3432 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3433                   dns_name_t *foundname, dns_rdataset_t *rdataset,
3434                   dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3435                   dns_db_secure_t secure)
3436 {
3437         dns_rbtnode_t *node, *prevnode;
3438         rdatasetheader_t *header, *header_next, *found, *foundsig;
3439         dns_rbtnodechain_t nsecchain;
3440         isc_boolean_t empty_node;
3441         isc_result_t result;
3442         dns_fixedname_t fname, forigin;
3443         dns_name_t *name, *origin;
3444         dns_rdatatype_t type;
3445         rbtdb_rdatatype_t sigtype;
3446         isc_boolean_t wraps;
3447         isc_boolean_t first = ISC_TRUE;
3448         isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3449
3450         if (tree == search->rbtdb->nsec3) {
3451                 type = dns_rdatatype_nsec3;
3452                 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3453                 wraps = ISC_TRUE;
3454         } else {
3455                 type = dns_rdatatype_nsec;
3456                 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3457                 wraps = ISC_FALSE;
3458         }
3459
3460         /*
3461          * Use the auxiliary tree only starting with the second node in the
3462          * hope that the original node will be right much of the time.
3463          */
3464         dns_fixedname_init(&fname);
3465         name = dns_fixedname_name(&fname);
3466         dns_fixedname_init(&forigin);
3467         origin = dns_fixedname_name(&forigin);
3468  again:
3469         node = NULL;
3470         prevnode = NULL;
3471         result = dns_rbtnodechain_current(&search->chain, name, origin, &node);
3472         if (result != ISC_R_SUCCESS)
3473                 return (result);
3474         do {
3475                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3476                           isc_rwlocktype_read);
3477                 found = NULL;
3478                 foundsig = NULL;
3479                 empty_node = ISC_TRUE;
3480                 for (header = node->data;
3481                      header != NULL;
3482                      header = header_next) {
3483                         header_next = header->next;
3484                         /*
3485                          * Look for an active, extant NSEC or RRSIG NSEC.
3486                          */
3487                         do {
3488                                 if (header->serial <= search->serial &&
3489                                     !IGNORE(header)) {
3490                                         /*
3491                                          * Is this a "this rdataset doesn't
3492                                          * exist" record?
3493                                          */
3494                                         if (NONEXISTENT(header))
3495                                                 header = NULL;
3496                                         break;
3497                                 } else
3498                                         header = header->down;
3499                         } while (header != NULL);
3500                         if (header != NULL) {
3501                                 /*
3502                                  * We now know that there is at least one
3503                                  * active rdataset at this node.
3504                                  */
3505                                 empty_node = ISC_FALSE;
3506                                 if (header->type == type) {
3507                                         found = header;
3508                                         if (foundsig != NULL)
3509                                                 break;
3510                                 } else if (header->type == sigtype) {
3511                                         foundsig = header;
3512                                         if (found != NULL)
3513                                                 break;
3514                                 }
3515                         }
3516                 }
3517                 if (!empty_node) {
3518                         if (found != NULL && search->rbtversion->havensec3 &&
3519                             found->type == dns_rdatatype_nsec3 &&
3520                             !matchparams(found, search)) {
3521                                 empty_node = ISC_TRUE;
3522                                 found = NULL;
3523                                 foundsig = NULL;
3524                                 result = previous_closest_nsec(type, search,
3525                                                                name, origin,
3526                                                                &prevnode, NULL,
3527                                                                NULL);
3528                         } else if (found != NULL &&
3529                                    (foundsig != NULL || !need_sig)) {
3530                                 /*
3531                                  * We've found the right NSEC/NSEC3 record.
3532                                  *
3533                                  * Note: for this to really be the right
3534                                  * NSEC record, it's essential that the NSEC
3535                                  * records of any nodes obscured by a zone
3536                                  * cut have been removed; we assume this is
3537                                  * the case.
3538                                  */
3539                                 result = dns_name_concatenate(name, origin,
3540                                                               foundname, NULL);
3541                                 if (result == ISC_R_SUCCESS) {
3542                                         if (nodep != NULL) {
3543                                                 new_reference(search->rbtdb,
3544                                                               node);
3545                                                 *nodep = node;
3546                                         }
3547                                         bind_rdataset(search->rbtdb, node,
3548                                                       found, search->now,
3549                                                       rdataset);
3550                                         if (foundsig != NULL)
3551                                                 bind_rdataset(search->rbtdb,
3552                                                               node,
3553                                                               foundsig,
3554                                                               search->now,
3555                                                               sigrdataset);
3556                                 }
3557                         } else if (found == NULL && foundsig == NULL) {
3558                                 /*
3559                                  * This node is active, but has no NSEC or
3560                                  * RRSIG NSEC.  That means it's glue or
3561                                  * other obscured zone data that isn't
3562                                  * relevant for our search.  Treat the
3563                                  * node as if it were empty and keep looking.
3564                                  */
3565                                 empty_node = ISC_TRUE;
3566                                 result = previous_closest_nsec(type, search,
3567                                                                name, origin,
3568                                                                &prevnode,
3569                                                                &nsecchain,
3570                                                                &first);
3571                         } else {
3572                                 /*
3573                                  * We found an active node, but either the
3574                                  * NSEC or the RRSIG NSEC is missing.  This
3575                                  * shouldn't happen.
3576                                  */
3577                                 result = DNS_R_BADDB;
3578                         }
3579                 } else {
3580                         /*
3581                          * This node isn't active.  We've got to keep
3582                          * looking.
3583                          */
3584                         result = previous_closest_nsec(type, search,
3585                                                        name, origin, &prevnode,
3586                                                        &nsecchain, &first);
3587                 }
3588                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3589                             isc_rwlocktype_read);
3590                 node = prevnode;
3591                 prevnode = NULL;
3592         } while (empty_node && result == ISC_R_SUCCESS);
3593
3594         if (!first)
3595                 dns_rbtnodechain_invalidate(&nsecchain);
3596
3597         if (result == ISC_R_NOMORE && wraps) {
3598                 result = dns_rbtnodechain_last(&search->chain, tree,
3599                                                NULL, NULL);
3600                 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3601                         wraps = ISC_FALSE;
3602                         goto again;
3603                 }
3604         }
3605
3606         /*
3607          * If the result is ISC_R_NOMORE, then we got to the beginning of
3608          * the database and didn't find a NSEC record.  This shouldn't
3609          * happen.
3610          */
3611         if (result == ISC_R_NOMORE)
3612                 result = DNS_R_BADDB;
3613
3614         return (result);
3615 }
3616
3617 static isc_result_t
3618 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3619           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3620           dns_dbnode_t **nodep, dns_name_t *foundname,
3621           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3622 {
3623         dns_rbtnode_t *node = NULL;
3624         isc_result_t result;
3625         rbtdb_search_t search;
3626         isc_boolean_t cname_ok = ISC_TRUE;
3627         isc_boolean_t close_version = ISC_FALSE;
3628         isc_boolean_t maybe_zonecut = ISC_FALSE;
3629         isc_boolean_t at_zonecut = ISC_FALSE;
3630         isc_boolean_t wild;
3631         isc_boolean_t empty_node;
3632         rdatasetheader_t *header, *header_next, *found, *nsecheader;
3633         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3634         rbtdb_rdatatype_t sigtype;
3635         isc_boolean_t active;
3636         dns_rbtnodechain_t chain;
3637         nodelock_t *lock;
3638         dns_rbt_t *tree;
3639
3640         search.rbtdb = (dns_rbtdb_t *)db;
3641
3642         REQUIRE(VALID_RBTDB(search.rbtdb));
3643
3644         /*
3645          * We don't care about 'now'.
3646          */
3647         UNUSED(now);
3648
3649         /*
3650          * If the caller didn't supply a version, attach to the current
3651          * version.
3652          */
3653         if (version == NULL) {
3654                 currentversion(db, &version);
3655                 close_version = ISC_TRUE;
3656         }
3657
3658         search.rbtversion = version;
3659         search.serial = search.rbtversion->serial;
3660         search.options = options;
3661         search.copy_name = ISC_FALSE;
3662         search.need_cleanup = ISC_FALSE;
3663         search.wild = ISC_FALSE;
3664         search.zonecut = NULL;
3665         dns_fixedname_init(&search.zonecut_name);
3666         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3667         search.now = 0;
3668
3669         /*
3670          * 'wild' will be true iff. we've matched a wildcard.
3671          */
3672         wild = ISC_FALSE;
3673
3674         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3675
3676         /*
3677          * Search down from the root of the tree.  If, while going down, we
3678          * encounter a callback node, zone_zonecut_callback() will search the
3679          * rdatasets at the zone cut for active DNAME or NS rdatasets.
3680          */
3681         tree =  (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3682                                                          search.rbtdb->tree;
3683         result = dns_rbt_findnode(tree, name, foundname, &node,
3684                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
3685                                   zone_zonecut_callback, &search);
3686
3687         if (result == DNS_R_PARTIALMATCH) {
3688         partial_match:
3689                 if (search.zonecut != NULL) {
3690                     result = setup_delegation(&search, nodep, foundname,
3691                                               rdataset, sigrdataset);
3692                     goto tree_exit;
3693                 }
3694
3695                 if (search.wild) {
3696                         /*
3697                          * At least one of the levels in the search chain
3698                          * potentially has a wildcard.  For each such level,
3699                          * we must see if there's a matching wildcard active
3700                          * in the current version.
3701                          */
3702                         result = find_wildcard(&search, &node, name);
3703                         if (result == ISC_R_SUCCESS) {
3704                                 result = dns_name_copy(name, foundname, NULL);
3705                                 if (result != ISC_R_SUCCESS)
3706                                         goto tree_exit;
3707                                 wild = ISC_TRUE;
3708                                 goto found;
3709                         }
3710                         else if (result != ISC_R_NOTFOUND)
3711                                 goto tree_exit;
3712                 }
3713
3714                 chain = search.chain;
3715                 active = activeempty(&search, &chain, name);
3716
3717                 /*
3718                  * If we're here, then the name does not exist, is not
3719                  * beneath a zonecut, and there's no matching wildcard.
3720                  */
3721                 if ((search.rbtversion->secure == dns_db_secure &&
3722                      !search.rbtversion->havensec3) ||
3723                     (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3724                     (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3725                 {
3726                         result = find_closest_nsec(&search, nodep, foundname,
3727                                                    rdataset, sigrdataset, tree,
3728                                                    search.rbtversion->secure);
3729                         if (result == ISC_R_SUCCESS)
3730                                 result = active ? DNS_R_EMPTYNAME :
3731                                                   DNS_R_NXDOMAIN;
3732                 } else
3733                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3734                 goto tree_exit;
3735         } else if (result != ISC_R_SUCCESS)
3736                 goto tree_exit;
3737
3738  found:
3739         /*
3740          * We have found a node whose name is the desired name, or we
3741          * have matched a wildcard.
3742          */
3743
3744         if (search.zonecut != NULL) {
3745                 /*
3746                  * If we're beneath a zone cut, we don't want to look for
3747                  * CNAMEs because they're not legitimate zone glue.
3748                  */
3749                 cname_ok = ISC_FALSE;
3750         } else {
3751                 /*
3752                  * The node may be a zone cut itself.  If it might be one,
3753                  * make sure we check for it later.
3754                  *
3755                  * DS records live above the zone cut in ordinary zone so
3756                  * we want to ignore any referral.
3757                  *
3758                  * Stub zones don't have anything "above" the delgation so
3759                  * we always return a referral.
3760                  */
3761                 if (node->find_callback &&
3762                     ((node != search.rbtdb->origin_node &&
3763                       !dns_rdatatype_atparent(type)) ||
3764                      IS_STUB(search.rbtdb)))
3765                         maybe_zonecut = ISC_TRUE;
3766         }
3767
3768         /*
3769          * Certain DNSSEC types are not subject to CNAME matching
3770          * (RFC4035, section 2.5 and RFC3007).
3771          *
3772          * We don't check for RRSIG, because we don't store RRSIG records
3773          * directly.
3774          */
3775         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3776                 cname_ok = ISC_FALSE;
3777
3778         /*
3779          * We now go looking for rdata...
3780          */
3781
3782         lock = &search.rbtdb->node_locks[node->locknum].lock;
3783         NODE_LOCK(lock, isc_rwlocktype_read);
3784
3785         found = NULL;
3786         foundsig = NULL;
3787         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3788         nsecheader = NULL;
3789         nsecsig = NULL;
3790         cnamesig = NULL;
3791         empty_node = ISC_TRUE;
3792         for (header = node->data; header != NULL; header = header_next) {
3793                 header_next = header->next;
3794                 /*
3795                  * Look for an active, extant rdataset.
3796                  */
3797                 do {
3798                         if (header->serial <= search.serial &&
3799                             !IGNORE(header)) {
3800                                 /*
3801                                  * Is this a "this rdataset doesn't
3802                                  * exist" record?
3803                                  */
3804                                 if (NONEXISTENT(header))
3805                                         header = NULL;
3806                                 break;
3807                         } else
3808                                 header = header->down;
3809                 } while (header != NULL);
3810                 if (header != NULL) {
3811                         /*
3812                          * We now know that there is at least one active
3813                          * rdataset at this node.
3814                          */
3815                         empty_node = ISC_FALSE;
3816
3817                         /*
3818                          * Do special zone cut handling, if requested.
3819                          */
3820                         if (maybe_zonecut &&
3821                             header->type == dns_rdatatype_ns) {
3822                                 /*
3823                                  * We increment the reference count on node to
3824                                  * ensure that search->zonecut_rdataset will
3825                                  * still be valid later.
3826                                  */
3827                                 new_reference(search.rbtdb, node);
3828                                 search.zonecut = node;
3829                                 search.zonecut_rdataset = header;
3830                                 search.zonecut_sigrdataset = NULL;
3831                                 search.need_cleanup = ISC_TRUE;
3832                                 maybe_zonecut = ISC_FALSE;
3833                                 at_zonecut = ISC_TRUE;
3834                                 /*
3835                                  * It is not clear if KEY should still be
3836                                  * allowed at the parent side of the zone
3837                                  * cut or not.  It is needed for RFC3007
3838                                  * validated updates.
3839                                  */
3840                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3841                                     && type != dns_rdatatype_nsec
3842                                     && type != dns_rdatatype_key) {
3843                                         /*
3844                                          * Glue is not OK, but any answer we
3845                                          * could return would be glue.  Return
3846                                          * the delegation.
3847                                          */
3848                                         found = NULL;
3849                                         break;
3850                                 }
3851                                 if (found != NULL && foundsig != NULL)
3852                                         break;
3853                         }
3854
3855
3856                         /*
3857                          * If the NSEC3 record doesn't match the chain
3858                          * we are using behave as if it isn't here.
3859                          */
3860                         if (header->type == dns_rdatatype_nsec3 &&
3861                            !matchparams(header, &search)) {
3862                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3863                                 goto partial_match;
3864                         }
3865                         /*
3866                          * If we found a type we were looking for,
3867                          * remember it.
3868                          */
3869                         if (header->type == type ||
3870                             type == dns_rdatatype_any ||
3871                             (header->type == dns_rdatatype_cname &&
3872                              cname_ok)) {
3873                                 /*
3874                                  * We've found the answer!
3875                                  */
3876                                 found = header;
3877                                 if (header->type == dns_rdatatype_cname &&
3878                                     cname_ok) {
3879                                         /*
3880                                          * We may be finding a CNAME instead
3881                                          * of the desired type.
3882                                          *
3883                                          * If we've already got the CNAME RRSIG,
3884                                          * use it, otherwise change sigtype
3885                                          * so that we find it.
3886                                          */
3887                                         if (cnamesig != NULL)
3888                                                 foundsig = cnamesig;
3889                                         else
3890                                                 sigtype =
3891                                                     RBTDB_RDATATYPE_SIGCNAME;
3892                                 }
3893                                 /*
3894                                  * If we've got all we need, end the search.
3895                                  */
3896                                 if (!maybe_zonecut && foundsig != NULL)
3897                                         break;
3898                         } else if (header->type == sigtype) {
3899                                 /*
3900                                  * We've found the RRSIG rdataset for our
3901                                  * target type.  Remember it.
3902                                  */
3903                                 foundsig = header;
3904                                 /*
3905                                  * If we've got all we need, end the search.
3906                                  */
3907                                 if (!maybe_zonecut && found != NULL)
3908                                         break;
3909                         } else if (header->type == dns_rdatatype_nsec &&
3910                                    !search.rbtversion->havensec3) {
3911                                 /*
3912                                  * Remember a NSEC rdataset even if we're
3913                                  * not specifically looking for it, because
3914                                  * we might need it later.
3915                                  */
3916                                 nsecheader = header;
3917                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3918                                    !search.rbtversion->havensec3) {
3919                                 /*
3920                                  * If we need the NSEC rdataset, we'll also
3921                                  * need its signature.
3922                                  */
3923                                 nsecsig = header;
3924                         } else if (cname_ok &&
3925                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3926                                 /*
3927                                  * If we get a CNAME match, we'll also need
3928                                  * its signature.
3929                                  */
3930                                 cnamesig = header;
3931                         }
3932                 }
3933         }
3934
3935         if (empty_node) {
3936                 /*
3937                  * We have an exact match for the name, but there are no
3938                  * active rdatasets in the desired version.  That means that
3939                  * this node doesn't exist in the desired version, and that
3940                  * we really have a partial match.
3941                  */
3942                 if (!wild) {
3943                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3944                         goto partial_match;
3945                 }
3946         }
3947
3948         /*
3949          * If we didn't find what we were looking for...
3950          */
3951         if (found == NULL) {
3952                 if (search.zonecut != NULL) {
3953                         /*
3954                          * We were trying to find glue at a node beneath a
3955                          * zone cut, but didn't.
3956                          *
3957                          * Return the delegation.
3958                          */
3959                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3960                         result = setup_delegation(&search, nodep, foundname,
3961                                                   rdataset, sigrdataset);
3962                         goto tree_exit;
3963                 }
3964                 /*
3965                  * The desired type doesn't exist.
3966                  */
3967                 result = DNS_R_NXRRSET;
3968                 if (search.rbtversion->secure == dns_db_secure &&
3969                     !search.rbtversion->havensec3 &&
3970                     (nsecheader == NULL || nsecsig == NULL)) {
3971                         /*
3972                          * The zone is secure but there's no NSEC,
3973                          * or the NSEC has no signature!
3974                          */
3975                         if (!wild) {
3976                                 result = DNS_R_BADDB;
3977                                 goto node_exit;
3978                         }
3979
3980                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3981                         result = find_closest_nsec(&search, nodep, foundname,
3982                                                    rdataset, sigrdataset,
3983                                                    search.rbtdb->tree,
3984                                                    search.rbtversion->secure);
3985                         if (result == ISC_R_SUCCESS)
3986                                 result = DNS_R_EMPTYWILD;
3987                         goto tree_exit;
3988                 }
3989                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3990                     nsecheader == NULL)
3991                 {
3992                         /*
3993                          * There's no NSEC record, and we were told
3994                          * to find one.
3995                          */
3996                         result = DNS_R_BADDB;
3997                         goto node_exit;
3998                 }
3999                 if (nodep != NULL) {
4000                         new_reference(search.rbtdb, node);
4001                         *nodep = node;
4002                 }
4003                 if ((search.rbtversion->secure == dns_db_secure &&
4004                      !search.rbtversion->havensec3) ||
4005                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
4006                 {
4007                         bind_rdataset(search.rbtdb, node, nsecheader,
4008                                       0, rdataset);
4009                         if (nsecsig != NULL)
4010                                 bind_rdataset(search.rbtdb, node,
4011                                               nsecsig, 0, sigrdataset);
4012                 }
4013                 if (wild)
4014                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4015                 goto node_exit;
4016         }
4017
4018         /*
4019          * We found what we were looking for, or we found a CNAME.
4020          */
4021
4022         if (type != found->type &&
4023             type != dns_rdatatype_any &&
4024             found->type == dns_rdatatype_cname) {
4025                 /*
4026                  * We weren't doing an ANY query and we found a CNAME instead
4027                  * of the type we were looking for, so we need to indicate
4028                  * that result to the caller.
4029                  */
4030                 result = DNS_R_CNAME;
4031         } else if (search.zonecut != NULL) {
4032                 /*
4033                  * If we're beneath a zone cut, we must indicate that the
4034                  * result is glue, unless we're actually at the zone cut
4035                  * and the type is NSEC or KEY.
4036                  */
4037                 if (search.zonecut == node) {
4038                         /*
4039                          * It is not clear if KEY should still be
4040                          * allowed at the parent side of the zone
4041                          * cut or not.  It is needed for RFC3007
4042                          * validated updates.
4043                          */
4044                         if (type == dns_rdatatype_nsec ||
4045                             type == dns_rdatatype_nsec3 ||
4046                             type == dns_rdatatype_key)
4047                                 result = ISC_R_SUCCESS;
4048                         else if (type == dns_rdatatype_any)
4049                                 result = DNS_R_ZONECUT;
4050                         else
4051                                 result = DNS_R_GLUE;
4052                 } else
4053                         result = DNS_R_GLUE;
4054                 /*
4055                  * We might have found data that isn't glue, but was occluded
4056                  * by a dynamic update.  If the caller cares about this, they
4057                  * will have told us to validate glue.
4058                  *
4059                  * XXX We should cache the glue validity state!
4060                  */
4061                 if (result == DNS_R_GLUE &&
4062                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
4063                     !valid_glue(&search, foundname, type, node)) {
4064                         NODE_UNLOCK(lock, isc_rwlocktype_read);
4065                         result = setup_delegation(&search, nodep, foundname,
4066                                                   rdataset, sigrdataset);
4067                     goto tree_exit;
4068                 }
4069         } else {
4070                 /*
4071                  * An ordinary successful query!
4072                  */
4073                 result = ISC_R_SUCCESS;
4074         }
4075
4076         if (nodep != NULL) {
4077                 if (!at_zonecut)
4078                         new_reference(search.rbtdb, node);
4079                 else
4080                         search.need_cleanup = ISC_FALSE;
4081                 *nodep = node;
4082         }
4083
4084         if (type != dns_rdatatype_any) {
4085                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
4086                 if (foundsig != NULL)
4087                         bind_rdataset(search.rbtdb, node, foundsig, 0,
4088                                       sigrdataset);
4089         }
4090
4091         if (wild)
4092                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4093
4094  node_exit:
4095         NODE_UNLOCK(lock, isc_rwlocktype_read);
4096
4097  tree_exit:
4098         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4099
4100         /*
4101          * If we found a zonecut but aren't going to use it, we have to
4102          * let go of it.
4103          */
4104         if (search.need_cleanup) {
4105                 node = search.zonecut;
4106                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4107
4108                 NODE_LOCK(lock, isc_rwlocktype_read);
4109                 decrement_reference(search.rbtdb, node, 0,
4110                                     isc_rwlocktype_read, isc_rwlocktype_none,
4111                                     ISC_FALSE);
4112                 NODE_UNLOCK(lock, isc_rwlocktype_read);
4113         }
4114
4115         if (close_version)
4116                 closeversion(db, &version, ISC_FALSE);
4117
4118         dns_rbtnodechain_reset(&search.chain);
4119
4120         return (result);
4121 }
4122
4123 static isc_result_t
4124 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4125                  isc_stdtime_t now, dns_dbnode_t **nodep,
4126                  dns_name_t *foundname,
4127                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4128 {
4129         UNUSED(db);
4130         UNUSED(name);
4131         UNUSED(options);
4132         UNUSED(now);
4133         UNUSED(nodep);
4134         UNUSED(foundname);
4135         UNUSED(rdataset);
4136         UNUSED(sigrdataset);
4137
4138         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
4139
4140         /* NOTREACHED */
4141         return (ISC_R_NOTIMPLEMENTED);
4142 }
4143
4144 static isc_result_t
4145 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
4146         rbtdb_search_t *search = arg;
4147         rdatasetheader_t *header, *header_prev, *header_next;
4148         rdatasetheader_t *dname_header, *sigdname_header;
4149         isc_result_t result;
4150         nodelock_t *lock;
4151         isc_rwlocktype_t locktype;
4152
4153         /* XXX comment */
4154
4155         REQUIRE(search->zonecut == NULL);
4156
4157         /*
4158          * Keep compiler silent.
4159          */
4160         UNUSED(name);
4161
4162         lock = &(search->rbtdb->node_locks[node->locknum].lock);
4163         locktype = isc_rwlocktype_read;
4164         NODE_LOCK(lock, locktype);
4165
4166         /*
4167          * Look for a DNAME or RRSIG DNAME rdataset.
4168          */
4169         dname_header = NULL;
4170         sigdname_header = NULL;
4171         header_prev = NULL;
4172         for (header = node->data; header != NULL; header = header_next) {
4173                 header_next = header->next;
4174                 if (header->rdh_ttl <= search->now) {
4175                         /*
4176                          * This rdataset is stale.  If no one else is
4177                          * using the node, we can clean it up right
4178                          * now, otherwise we mark it as stale, and
4179                          * the node as dirty, so it will get cleaned
4180                          * up later.
4181                          */
4182                         if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
4183                             (locktype == isc_rwlocktype_write ||
4184                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4185                                 /*
4186                                  * We update the node's status only when we
4187                                  * can get write access; otherwise, we leave
4188                                  * others to this work.  Periodical cleaning
4189                                  * will eventually take the job as the last
4190                                  * resort.
4191                                  * We won't downgrade the lock, since other
4192                                  * rdatasets are probably stale, too.
4193                                  */
4194                                 locktype = isc_rwlocktype_write;
4195
4196                                 if (dns_rbtnode_refcurrent(node) == 0) {
4197                                         isc_mem_t *mctx;
4198
4199                                         /*
4200                                          * header->down can be non-NULL if the
4201                                          * refcount has just decremented to 0
4202                                          * but decrement_reference() has not
4203                                          * performed clean_cache_node(), in
4204                                          * which case we need to purge the
4205                                          * stale headers first.
4206                                          */
4207                                         mctx = search->rbtdb->common.mctx;
4208                                         clean_stale_headers(search->rbtdb,
4209                                                             mctx,
4210                                                             header);
4211                                         if (header_prev != NULL)
4212                                                 header_prev->next =
4213                                                         header->next;
4214                                         else
4215                                                 node->data = header->next;
4216                                         free_rdataset(search->rbtdb, mctx,
4217                                                       header);
4218                                 } else {
4219                                         header->attributes |=
4220                                                 RDATASET_ATTR_STALE;
4221                                         node->dirty = 1;
4222                                         header_prev = header;
4223                                 }
4224                         } else
4225                                 header_prev = header;
4226                 } else if (header->type == dns_rdatatype_dname &&
4227                            EXISTS(header)) {
4228                         dname_header = header;
4229                         header_prev = header;
4230                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4231                          EXISTS(header)) {
4232                         sigdname_header = header;
4233                         header_prev = header;
4234                 } else
4235                         header_prev = header;
4236         }
4237
4238         if (dname_header != NULL &&
4239             (!DNS_TRUST_PENDING(dname_header->trust) ||
4240              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4241                 /*
4242                  * We increment the reference count on node to ensure that
4243                  * search->zonecut_rdataset will still be valid later.
4244                  */
4245                 new_reference(search->rbtdb, node);
4246                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4247                 search->zonecut = node;
4248                 search->zonecut_rdataset = dname_header;
4249                 search->zonecut_sigrdataset = sigdname_header;
4250                 search->need_cleanup = ISC_TRUE;
4251                 result = DNS_R_PARTIALMATCH;
4252         } else
4253                 result = DNS_R_CONTINUE;
4254
4255         NODE_UNLOCK(lock, locktype);
4256
4257         return (result);
4258 }
4259
4260 static inline isc_result_t
4261 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4262                      dns_dbnode_t **nodep, dns_name_t *foundname,
4263                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4264 {
4265         unsigned int i;
4266         dns_rbtnode_t *level_node;
4267         rdatasetheader_t *header, *header_prev, *header_next;
4268         rdatasetheader_t *found, *foundsig;
4269         isc_result_t result = ISC_R_NOTFOUND;
4270         dns_name_t name;
4271         dns_rbtdb_t *rbtdb;
4272         isc_boolean_t done;
4273         nodelock_t *lock;
4274         isc_rwlocktype_t locktype;
4275
4276         /*
4277          * Caller must be holding the tree lock.
4278          */
4279
4280         rbtdb = search->rbtdb;
4281         i = search->chain.level_matches;
4282         done = ISC_FALSE;
4283         do {
4284                 locktype = isc_rwlocktype_read;
4285                 lock = &rbtdb->node_locks[node->locknum].lock;
4286                 NODE_LOCK(lock, locktype);
4287
4288                 /*
4289                  * Look for NS and RRSIG NS rdatasets.
4290                  */
4291                 found = NULL;
4292                 foundsig = NULL;
4293                 header_prev = NULL;
4294                 for (header = node->data;
4295                      header != NULL;
4296                      header = header_next) {
4297                         header_next = header->next;
4298                         if (header->rdh_ttl <= search->now) {
4299                                 /*
4300                                  * This rdataset is stale.  If no one else is
4301                                  * using the node, we can clean it up right
4302                                  * now, otherwise we mark it as stale, and
4303                                  * the node as dirty, so it will get cleaned
4304                                  * up later.
4305                                  */
4306                                 if ((header->rdh_ttl <= search->now -
4307                                                     RBTDB_VIRTUAL) &&
4308                                     (locktype == isc_rwlocktype_write ||
4309                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4310                                         /*
4311                                          * We update the node's status only
4312                                          * when we can get write access.
4313                                          */
4314                                         locktype = isc_rwlocktype_write;
4315
4316                                         if (dns_rbtnode_refcurrent(node)
4317                                             == 0) {
4318                                                 isc_mem_t *m;
4319
4320                                                 m = search->rbtdb->common.mctx;
4321                                                 clean_stale_headers(
4322                                                         search->rbtdb,
4323                                                         m, header);
4324                                                 if (header_prev != NULL)
4325                                                         header_prev->next =
4326                                                                 header->next;
4327                                                 else
4328                                                         node->data =
4329                                                                 header->next;
4330                                                 free_rdataset(rbtdb, m,
4331                                                               header);
4332                                         } else {
4333                                                 header->attributes |=
4334                                                         RDATASET_ATTR_STALE;
4335                                                 node->dirty = 1;
4336                                                 header_prev = header;
4337                                         }
4338                                 } else
4339                                         header_prev = header;
4340                         } else if (EXISTS(header)) {
4341                                 /*
4342                                  * We've found an extant rdataset.  See if
4343                                  * we're interested in it.
4344                                  */
4345                                 if (header->type == dns_rdatatype_ns) {
4346                                         found = header;
4347                                         if (foundsig != NULL)
4348                                                 break;
4349                                 } else if (header->type ==
4350                                            RBTDB_RDATATYPE_SIGNS) {
4351                                         foundsig = header;
4352                                         if (found != NULL)
4353                                                 break;
4354                                 }
4355                                 header_prev = header;
4356                         } else
4357                                 header_prev = header;
4358                 }
4359
4360                 if (found != NULL) {
4361                         /*
4362                          * If we have to set foundname, we do it before
4363                          * anything else.  If we were to set foundname after
4364                          * we had set nodep or bound the rdataset, then we'd
4365                          * have to undo that work if dns_name_concatenate()
4366                          * failed.  By setting foundname first, there's
4367                          * nothing to undo if we have trouble.
4368                          */
4369                         if (foundname != NULL) {
4370                                 dns_name_init(&name, NULL);
4371                                 dns_rbt_namefromnode(node, &name);
4372                                 result = dns_name_copy(&name, foundname, NULL);
4373                                 while (result == ISC_R_SUCCESS && i > 0) {
4374                                         i--;
4375                                         level_node = search->chain.levels[i];
4376                                         dns_name_init(&name, NULL);
4377                                         dns_rbt_namefromnode(level_node,
4378                                                              &name);
4379                                         result =
4380                                                 dns_name_concatenate(foundname,
4381                                                                      &name,
4382                                                                      foundname,
4383                                                                      NULL);
4384                                 }
4385                                 if (result != ISC_R_SUCCESS) {
4386                                         *nodep = NULL;
4387                                         goto node_exit;
4388                                 }
4389                         }
4390                         result = DNS_R_DELEGATION;
4391                         if (nodep != NULL) {
4392                                 new_reference(search->rbtdb, node);
4393                                 *nodep = node;
4394                         }
4395                         bind_rdataset(search->rbtdb, node, found, search->now,
4396                                       rdataset);
4397                         if (foundsig != NULL)
4398                                 bind_rdataset(search->rbtdb, node, foundsig,
4399                                               search->now, sigrdataset);
4400                         if (need_headerupdate(found, search->now) ||
4401                             (foundsig != NULL &&
4402                              need_headerupdate(foundsig, search->now))) {
4403                                 if (locktype != isc_rwlocktype_write) {
4404                                         NODE_UNLOCK(lock, locktype);
4405                                         NODE_LOCK(lock, isc_rwlocktype_write);
4406                                         locktype = isc_rwlocktype_write;
4407                                 }
4408                                 if (need_headerupdate(found, search->now))
4409                                         update_header(search->rbtdb, found,
4410                                                       search->now);
4411                                 if (foundsig != NULL &&
4412                                     need_headerupdate(foundsig, search->now)) {
4413                                         update_header(search->rbtdb, foundsig,
4414                                                       search->now);
4415                                 }
4416                         }
4417                 }
4418
4419         node_exit:
4420                 NODE_UNLOCK(lock, locktype);
4421
4422                 if (found == NULL && i > 0) {
4423                         i--;
4424                         node = search->chain.levels[i];
4425                 } else
4426                         done = ISC_TRUE;
4427
4428         } while (!done);
4429
4430         return (result);
4431 }
4432
4433 static isc_result_t
4434 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4435                   isc_stdtime_t now, dns_name_t *foundname,
4436                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4437 {
4438         dns_rbtnode_t *node;
4439         rdatasetheader_t *header, *header_next, *header_prev;
4440         rdatasetheader_t *found, *foundsig;
4441         isc_boolean_t empty_node;
4442         isc_result_t result;
4443         dns_fixedname_t fname, forigin;
4444         dns_name_t *name, *origin;
4445         rbtdb_rdatatype_t matchtype, sigmatchtype;
4446         nodelock_t *lock;
4447         isc_rwlocktype_t locktype;
4448
4449         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4450         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4451                                              dns_rdatatype_nsec);
4452
4453         do {
4454                 node = NULL;
4455                 dns_fixedname_init(&fname);
4456                 name = dns_fixedname_name(&fname);
4457                 dns_fixedname_init(&forigin);
4458                 origin = dns_fixedname_name(&forigin);
4459                 result = dns_rbtnodechain_current(&search->chain, name,
4460                                                   origin, &node);
4461                 if (result != ISC_R_SUCCESS)
4462                         return (result);
4463                 locktype = isc_rwlocktype_read;
4464                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4465                 NODE_LOCK(lock, locktype);
4466                 found = NULL;
4467                 foundsig = NULL;
4468                 empty_node = ISC_TRUE;
4469                 header_prev = NULL;
4470                 for (header = node->data;
4471                      header != NULL;
4472                      header = header_next) {
4473                         header_next = header->next;
4474                         if (header->rdh_ttl <= now) {
4475                                 /*
4476                                  * This rdataset is stale.  If no one else is
4477                                  * using the node, we can clean it up right
4478                                  * now, otherwise we mark it as stale, and the
4479                                  * node as dirty, so it will get cleaned up
4480                                  * later.
4481                                  */
4482                                 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4483                                     (locktype == isc_rwlocktype_write ||
4484                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4485                                         /*
4486                                          * We update the node's status only
4487                                          * when we can get write access.
4488                                          */
4489                                         locktype = isc_rwlocktype_write;
4490
4491                                         if (dns_rbtnode_refcurrent(node)
4492                                             == 0) {
4493                                                 isc_mem_t *m;
4494
4495                                                 m = search->rbtdb->common.mctx;
4496                                                 clean_stale_headers(
4497                                                         search->rbtdb,
4498                                                         m, header);
4499                                                 if (header_prev != NULL)
4500                                                         header_prev->next =
4501                                                                 header->next;
4502                                                 else
4503                                                         node->data = header->next;
4504                                                 free_rdataset(search->rbtdb, m,
4505                                                               header);
4506                                         } else {
4507                                                 header->attributes |=
4508                                                         RDATASET_ATTR_STALE;
4509                                                 node->dirty = 1;
4510                                                 header_prev = header;
4511                                         }
4512                                 } else
4513                                         header_prev = header;
4514                                 continue;
4515                         }
4516                         if (NONEXISTENT(header) ||
4517                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
4518                                 header_prev = header;
4519                                 continue;
4520                         }
4521                         empty_node = ISC_FALSE;
4522                         if (header->type == matchtype)
4523                                 found = header;
4524                         else if (header->type == sigmatchtype)
4525                                 foundsig = header;
4526                         header_prev = header;
4527                 }
4528                 if (found != NULL) {
4529                         result = dns_name_concatenate(name, origin,
4530                                                       foundname, NULL);
4531                         if (result != ISC_R_SUCCESS)
4532                                 goto unlock_node;
4533                         bind_rdataset(search->rbtdb, node, found,
4534                                       now, rdataset);
4535                         if (foundsig != NULL)
4536                                 bind_rdataset(search->rbtdb, node, foundsig,
4537                                               now, sigrdataset);
4538                         new_reference(search->rbtdb, node);
4539                         *nodep = node;
4540                         result = DNS_R_COVERINGNSEC;
4541                 } else if (!empty_node) {
4542                         result = ISC_R_NOTFOUND;
4543                 } else
4544                         result = dns_rbtnodechain_prev(&search->chain, NULL,
4545                                                        NULL);
4546  unlock_node:
4547                 NODE_UNLOCK(lock, locktype);
4548         } while (empty_node && result == ISC_R_SUCCESS);
4549         return (result);
4550 }
4551
4552 /*
4553  * Mark a database for response policy rewriting.
4554  */
4555 #ifdef BIND9
4556 static void
4557 get_rpz_enabled(dns_db_t *db, dns_rpz_st_t *st)
4558 {
4559         dns_rbtdb_t *rbtdb;
4560
4561         rbtdb = (dns_rbtdb_t *)db;
4562         REQUIRE(VALID_RBTDB(rbtdb));
4563         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4564         dns_rpz_enabled(rbtdb->rpz_cidr, st);
4565         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4566 }
4567
4568 /*
4569  * Search the CDIR block tree of a response policy tree of trees for all of
4570  * the IP addresses in an A or AAAA rdataset.
4571  * Among the policies for all IPv4 and IPv6 addresses for a name, choose
4572  * the longest prefix.  Among those with the longest prefix, the first
4573  * configured policy.  Among answers for with the longest prefixes for
4574  * two or more IP addresses in the A and AAAA rdatasets the lexically
4575  * smallest address.
4576  */
4577 static isc_result_t
4578 rpz_findips(dns_rpz_zone_t *rpz, dns_rpz_type_t rpz_type,
4579             dns_zone_t *zone, dns_db_t *db, dns_dbversion_t *version,
4580             dns_rdataset_t *ardataset, dns_rpz_st_t *st)
4581 {
4582         dns_rbtdb_t *rbtdb;
4583         struct in_addr ina;
4584         struct in6_addr in6a;
4585         isc_netaddr_t netaddr;
4586         dns_fixedname_t selfnamef, qnamef;
4587         dns_name_t *selfname, *qname;
4588         dns_rbtnode_t *node;
4589         dns_rdataset_t zrdataset;
4590         dns_rpz_cidr_bits_t prefix;
4591         isc_result_t result;
4592         dns_rpz_policy_t rpz_policy;
4593         dns_ttl_t ttl;
4594
4595         rbtdb = (dns_rbtdb_t *)db;
4596         REQUIRE(VALID_RBTDB(rbtdb));
4597         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4598
4599         if (rbtdb->rpz_cidr == NULL) {
4600                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4601                 dns_db_detach(&db);
4602                 dns_zone_detach(&zone);
4603                 return (ISC_R_UNEXPECTED);
4604         }
4605
4606         dns_fixedname_init(&selfnamef);
4607         dns_fixedname_init(&qnamef);
4608         selfname = dns_fixedname_name(&selfnamef);
4609         qname = dns_fixedname_name(&qnamef);
4610
4611         for (result = dns_rdataset_first(ardataset);
4612              result == ISC_R_SUCCESS;
4613              result = dns_rdataset_next(ardataset)) {
4614                 dns_rdata_t rdata = DNS_RDATA_INIT;
4615                 dns_rdataset_current(ardataset, &rdata);
4616                 switch (rdata.type) {
4617                 case dns_rdatatype_a:
4618                         INSIST(rdata.length == 4);
4619                         memcpy(&ina.s_addr, rdata.data, 4);
4620                         isc_netaddr_fromin(&netaddr, &ina);
4621                         break;
4622                 case dns_rdatatype_aaaa:
4623                         INSIST(rdata.length == 16);
4624                         memcpy(in6a.s6_addr, rdata.data, 16);
4625                         isc_netaddr_fromin6(&netaddr, &in6a);
4626                         break;
4627                 default:
4628                         continue;
4629                 }
4630
4631                 result = dns_rpz_cidr_find(rbtdb->rpz_cidr, &netaddr, rpz_type,
4632                                            selfname, qname, &prefix);
4633                 if (result != ISC_R_SUCCESS)
4634                         continue;
4635
4636                 /*
4637                  * Choose the policy with the longest matching prefix.
4638                  * Between policies with the same prefix, choose the first
4639                  * configured.
4640                  */
4641                 if (st->m.policy != DNS_RPZ_POLICY_MISS) {
4642                         if (prefix < st->m.prefix)
4643                                 continue;
4644                         if (prefix == st->m.prefix &&
4645                             rpz->num > st->m.rpz->num)
4646                                 continue;
4647                 }
4648
4649                 /*
4650                  * We have rpz_st an entry with a prefix at least as long as
4651                  * the prefix of the entry we had before.  Find the node
4652                  * corresponding to CDIR tree entry.
4653                  */
4654                 node = NULL;
4655                 result = dns_rbt_findnode(rbtdb->tree, qname, NULL,
4656                                           &node, NULL, 0, NULL, NULL);
4657                 if (result != ISC_R_SUCCESS) {
4658                         char namebuf[DNS_NAME_FORMATSIZE];
4659
4660                         dns_name_format(qname, namebuf, sizeof(namebuf));
4661                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4662                                       DNS_LOGMODULE_CACHE, DNS_RPZ_ERROR_LEVEL,
4663                                       "rpz_findips findnode(%s): %s",
4664                                       namebuf, isc_result_totext(result));
4665                         continue;
4666                 }
4667                 /*
4668                  * First look for a simple rewrite of the IP address.
4669                  * If that fails, look for a CNAME.  If we cannot find
4670                  * a CNAME or the CNAME is neither of the special forms
4671                  * "*" or ".", treat it like a real CNAME.
4672                  */
4673                 dns_rdataset_init(&zrdataset);
4674                 result = dns_db_findrdataset(db, node, version, ardataset->type,
4675                                              0, 0, &zrdataset, NULL);
4676                 if (result != ISC_R_SUCCESS)
4677                         result = dns_db_findrdataset(db, node, version,
4678                                                      dns_rdatatype_cname,
4679                                                      0, 0, &zrdataset, NULL);
4680                 if (result == ISC_R_SUCCESS) {
4681                         if (zrdataset.type != dns_rdatatype_cname) {
4682                                 rpz_policy = DNS_RPZ_POLICY_RECORD;
4683                         } else {
4684                                 rpz_policy = dns_rpz_decode_cname(&zrdataset,
4685                                                                   selfname);
4686                                 if (rpz_policy == DNS_RPZ_POLICY_RECORD)
4687                                         result = DNS_R_CNAME;
4688                         }
4689                         ttl = zrdataset.ttl;
4690                 } else {
4691                         rpz_policy = DNS_RPZ_POLICY_RECORD;
4692                         result = DNS_R_NXRRSET;
4693                         ttl = DNS_RPZ_TTL_DEFAULT;
4694                 }
4695
4696                 /*
4697                  * Use an overriding action specified in the configuration file
4698                  */
4699                 if (rpz->policy != DNS_RPZ_POLICY_GIVEN &&
4700                     rpz_policy != DNS_RPZ_POLICY_NO_OP)
4701                         rpz_policy = rpz->policy;
4702
4703                 /*
4704                  * We know the new prefix is at least as long as the current.
4705                  * Prefer the new answer if the new prefix is longer.
4706                  * Prefer the zone configured first if the prefixes are equal.
4707                  * With two actions from the same zone, prefer the action
4708                  * on the "smallest" name.
4709                  */
4710                 if (st->m.policy == DNS_RPZ_POLICY_MISS ||
4711                     prefix > st->m.prefix ||
4712                     rpz->num <= st->m.rpz->num ||
4713                     0 > dns_name_compare(qname, st->qname)) {
4714                         if (dns_rdataset_isassociated(st->m.rdataset))
4715                                 dns_rdataset_disassociate(st->m.rdataset);
4716                         if (st->m.node != NULL)
4717                                 dns_db_detachnode(st->m.db, &st->m.node);
4718                         if (st->m.db != NULL)
4719                                 dns_db_detach(&st->m.db);
4720                         if (st->m.zone != NULL)
4721                                 dns_zone_detach(&st->m.zone);
4722                         st->m.rpz = rpz;
4723                         st->m.type = rpz_type;
4724                         st->m.prefix = prefix;
4725                         st->m.policy = rpz_policy;
4726                         st->m.ttl = ttl;
4727                         st->m.result = result;
4728                         dns_name_copy(qname, st->qname, NULL);
4729                         if (rpz_policy == DNS_RPZ_POLICY_RECORD &&
4730                             result != DNS_R_NXRRSET) {
4731                                 dns_rdataset_clone(&zrdataset,st->m.rdataset);
4732                                 dns_db_attachnode(db, node, &st->m.node);
4733                         }
4734                         dns_db_attach(db, &st->m.db);
4735                         dns_zone_attach(zone, &st->m.zone);
4736                 }
4737                 if (dns_rdataset_isassociated(&zrdataset))
4738                         dns_rdataset_disassociate(&zrdataset);
4739         }
4740
4741         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4742         return (ISC_R_SUCCESS);
4743 }
4744 #endif
4745
4746 static isc_result_t
4747 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4748            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4749            dns_dbnode_t **nodep, dns_name_t *foundname,
4750            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4751 {
4752         dns_rbtnode_t *node = NULL;
4753         isc_result_t result;
4754         rbtdb_search_t search;
4755         isc_boolean_t cname_ok = ISC_TRUE;
4756         isc_boolean_t empty_node;
4757         nodelock_t *lock;
4758         isc_rwlocktype_t locktype;
4759         rdatasetheader_t *header, *header_prev, *header_next;
4760         rdatasetheader_t *found, *nsheader;
4761         rdatasetheader_t *foundsig, *nssig, *cnamesig;
4762         rdatasetheader_t *update, *updatesig;
4763         rbtdb_rdatatype_t sigtype, negtype;
4764
4765         UNUSED(version);
4766
4767         search.rbtdb = (dns_rbtdb_t *)db;
4768
4769         REQUIRE(VALID_RBTDB(search.rbtdb));
4770         REQUIRE(version == NULL);
4771
4772         if (now == 0)
4773                 isc_stdtime_get(&now);
4774
4775         search.rbtversion = NULL;
4776         search.serial = 1;
4777         search.options = options;
4778         search.copy_name = ISC_FALSE;
4779         search.need_cleanup = ISC_FALSE;
4780         search.wild = ISC_FALSE;
4781         search.zonecut = NULL;
4782         dns_fixedname_init(&search.zonecut_name);
4783         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4784         search.now = now;
4785         update = NULL;
4786         updatesig = NULL;
4787
4788         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4789
4790         /*
4791          * Search down from the root of the tree.  If, while going down, we
4792          * encounter a callback node, cache_zonecut_callback() will search the
4793          * rdatasets at the zone cut for a DNAME rdataset.
4794          */
4795         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4796                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
4797                                   cache_zonecut_callback, &search);
4798
4799         if (result == DNS_R_PARTIALMATCH) {
4800                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4801                         result = find_coveringnsec(&search, nodep, now,
4802                                                    foundname, rdataset,
4803                                                    sigrdataset);
4804                         if (result == DNS_R_COVERINGNSEC)
4805                                 goto tree_exit;
4806                 }
4807                 if (search.zonecut != NULL) {
4808                     result = setup_delegation(&search, nodep, foundname,
4809                                               rdataset, sigrdataset);
4810                     goto tree_exit;
4811                 } else {
4812                 find_ns:
4813                         result = find_deepest_zonecut(&search, node, nodep,
4814                                                       foundname, rdataset,
4815                                                       sigrdataset);
4816                         goto tree_exit;
4817                 }
4818         } else if (result != ISC_R_SUCCESS)
4819                 goto tree_exit;
4820
4821         /*
4822          * Certain DNSSEC types are not subject to CNAME matching
4823          * (RFC4035, section 2.5 and RFC3007).
4824          *
4825          * We don't check for RRSIG, because we don't store RRSIG records
4826          * directly.
4827          */
4828         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4829                 cname_ok = ISC_FALSE;
4830
4831         /*
4832          * We now go looking for rdata...
4833          */
4834
4835         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4836         locktype = isc_rwlocktype_read;
4837         NODE_LOCK(lock, locktype);
4838
4839         found = NULL;
4840         foundsig = NULL;
4841         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4842         negtype = RBTDB_RDATATYPE_VALUE(0, type);
4843         nsheader = NULL;
4844         nssig = NULL;
4845         cnamesig = NULL;
4846         empty_node = ISC_TRUE;
4847         header_prev = NULL;
4848         for (header = node->data; header != NULL; header = header_next) {
4849                 header_next = header->next;
4850                 if (header->rdh_ttl <= now) {
4851                         /*
4852                          * This rdataset is stale.  If no one else is using the
4853                          * node, we can clean it up right now, otherwise we
4854                          * mark it as stale, and the node as dirty, so it will
4855                          * get cleaned up later.
4856                          */
4857                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4858                             (locktype == isc_rwlocktype_write ||
4859                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4860                                 /*
4861                                  * We update the node's status only when we
4862                                  * can get write access.
4863                                  */
4864                                 locktype = isc_rwlocktype_write;
4865
4866                                 if (dns_rbtnode_refcurrent(node) == 0) {
4867                                         isc_mem_t *mctx;
4868
4869                                         mctx = search.rbtdb->common.mctx;
4870                                         clean_stale_headers(search.rbtdb, mctx,
4871                                                             header);
4872                                         if (header_prev != NULL)
4873                                                 header_prev->next =
4874                                                         header->next;
4875                                         else
4876                                                 node->data = header->next;
4877                                         free_rdataset(search.rbtdb, mctx,
4878                                                       header);
4879                                 } else {
4880                                         header->attributes |=
4881                                                 RDATASET_ATTR_STALE;
4882                                         node->dirty = 1;
4883                                         header_prev = header;
4884                                 }
4885                         } else
4886                                 header_prev = header;
4887                 } else if (EXISTS(header)) {
4888                         /*
4889                          * We now know that there is at least one active
4890                          * non-stale rdataset at this node.
4891                          */
4892                         empty_node = ISC_FALSE;
4893
4894                         /*
4895                          * If we found a type we were looking for, remember
4896                          * it.
4897                          */
4898                         if (header->type == type ||
4899                             (type == dns_rdatatype_any &&
4900                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4901                             (cname_ok && header->type ==
4902                              dns_rdatatype_cname)) {
4903                                 /*
4904                                  * We've found the answer.
4905                                  */
4906                                 found = header;
4907                                 if (header->type == dns_rdatatype_cname &&
4908                                     cname_ok &&
4909                                     cnamesig != NULL) {
4910                                         /*
4911                                          * If we've already got the CNAME RRSIG,
4912                                          * use it, otherwise change sigtype
4913                                          * so that we find it.
4914                                          */
4915                                         if (cnamesig != NULL)
4916                                                 foundsig = cnamesig;
4917                                         else
4918                                                 sigtype =
4919                                                     RBTDB_RDATATYPE_SIGCNAME;
4920                                         foundsig = cnamesig;
4921                                 }
4922                         } else if (header->type == sigtype) {
4923                                 /*
4924                                  * We've found the RRSIG rdataset for our
4925                                  * target type.  Remember it.
4926                                  */
4927                                 foundsig = header;
4928                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4929                                    header->type == negtype) {
4930                                 /*
4931                                  * We've found a negative cache entry.
4932                                  */
4933                                 found = header;
4934                         } else if (header->type == dns_rdatatype_ns) {
4935                                 /*
4936                                  * Remember a NS rdataset even if we're
4937                                  * not specifically looking for it, because
4938                                  * we might need it later.
4939                                  */
4940                                 nsheader = header;
4941                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4942                                 /*
4943                                  * If we need the NS rdataset, we'll also
4944                                  * need its signature.
4945                                  */
4946                                 nssig = header;
4947                         } else if (cname_ok &&
4948                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
4949                                 /*
4950                                  * If we get a CNAME match, we'll also need
4951                                  * its signature.
4952                                  */
4953                                 cnamesig = header;
4954                         }
4955                         header_prev = header;
4956                 } else
4957                         header_prev = header;
4958         }
4959
4960         if (empty_node) {
4961                 /*
4962                  * We have an exact match for the name, but there are no
4963                  * extant rdatasets.  That means that this node doesn't
4964                  * meaningfully exist, and that we really have a partial match.
4965                  */
4966                 NODE_UNLOCK(lock, locktype);
4967                 goto find_ns;
4968         }
4969
4970         /*
4971          * If we didn't find what we were looking for...
4972          */
4973         if (found == NULL ||
4974             (DNS_TRUST_ADDITIONAL(found->trust) &&
4975              ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4976             (found->trust == dns_trust_glue &&
4977              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4978             (DNS_TRUST_PENDING(found->trust) &&
4979              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4980                 /*
4981                  * If there is an NS rdataset at this node, then this is the
4982                  * deepest zone cut.
4983                  */
4984                 if (nsheader != NULL) {
4985                         if (nodep != NULL) {
4986                                 new_reference(search.rbtdb, node);
4987                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4988                                 *nodep = node;
4989                         }
4990                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
4991                                       rdataset);
4992                         if (need_headerupdate(nsheader, search.now))
4993                                 update = nsheader;
4994                         if (nssig != NULL) {
4995                                 bind_rdataset(search.rbtdb, node, nssig,
4996                                               search.now, sigrdataset);
4997                                 if (need_headerupdate(nssig, search.now))
4998                                         updatesig = nssig;
4999                         }
5000                         result = DNS_R_DELEGATION;
5001                         goto node_exit;
5002                 }
5003
5004                 /*
5005                  * Go find the deepest zone cut.
5006                  */
5007                 NODE_UNLOCK(lock, locktype);
5008                 goto find_ns;
5009         }
5010
5011         /*
5012          * We found what we were looking for, or we found a CNAME.
5013          */
5014
5015         if (nodep != NULL) {
5016                 new_reference(search.rbtdb, node);
5017                 INSIST(!ISC_LINK_LINKED(node, deadlink));
5018                 *nodep = node;
5019         }
5020
5021         if (NEGATIVE(found)) {
5022                 /*
5023                  * We found a negative cache entry.
5024                  */
5025                 if (NXDOMAIN(found))
5026                         result = DNS_R_NCACHENXDOMAIN;
5027                 else
5028                         result = DNS_R_NCACHENXRRSET;
5029         } else if (type != found->type &&
5030                    type != dns_rdatatype_any &&
5031                    found->type == dns_rdatatype_cname) {
5032                 /*
5033                  * We weren't doing an ANY query and we found a CNAME instead
5034                  * of the type we were looking for, so we need to indicate
5035                  * that result to the caller.
5036                  */
5037                 result = DNS_R_CNAME;
5038         } else {
5039                 /*
5040                  * An ordinary successful query!
5041                  */
5042                 result = ISC_R_SUCCESS;
5043         }
5044
5045         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
5046             result == DNS_R_NCACHENXRRSET) {
5047                 bind_rdataset(search.rbtdb, node, found, search.now,
5048                               rdataset);
5049                 if (need_headerupdate(found, search.now))
5050                         update = found;
5051                 if (foundsig != NULL) {
5052                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
5053                                       sigrdataset);
5054                         if (need_headerupdate(foundsig, search.now))
5055                                 updatesig = foundsig;
5056                 }
5057         }
5058
5059  node_exit:
5060         if ((update != NULL || updatesig != NULL) &&
5061             locktype != isc_rwlocktype_write) {
5062                 NODE_UNLOCK(lock, locktype);
5063                 NODE_LOCK(lock, isc_rwlocktype_write);
5064                 locktype = isc_rwlocktype_write;
5065         }
5066         if (update != NULL && need_headerupdate(update, search.now))
5067                 update_header(search.rbtdb, update, search.now);
5068         if (updatesig != NULL && need_headerupdate(updatesig, search.now))
5069                 update_header(search.rbtdb, updatesig, search.now);
5070
5071         NODE_UNLOCK(lock, locktype);
5072
5073  tree_exit:
5074         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5075
5076         /*
5077          * If we found a zonecut but aren't going to use it, we have to
5078          * let go of it.
5079          */
5080         if (search.need_cleanup) {
5081                 node = search.zonecut;
5082                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
5083
5084                 NODE_LOCK(lock, isc_rwlocktype_read);
5085                 decrement_reference(search.rbtdb, node, 0,
5086                                     isc_rwlocktype_read, isc_rwlocktype_none,
5087                                     ISC_FALSE);
5088                 NODE_UNLOCK(lock, isc_rwlocktype_read);
5089         }
5090
5091         dns_rbtnodechain_reset(&search.chain);
5092
5093         return (result);
5094 }
5095
5096 static isc_result_t
5097 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
5098                   isc_stdtime_t now, dns_dbnode_t **nodep,
5099                   dns_name_t *foundname,
5100                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
5101 {
5102         dns_rbtnode_t *node = NULL;
5103         nodelock_t *lock;
5104         isc_result_t result;
5105         rbtdb_search_t search;
5106         rdatasetheader_t *header, *header_prev, *header_next;
5107         rdatasetheader_t *found, *foundsig;
5108         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
5109         isc_rwlocktype_t locktype;
5110
5111         search.rbtdb = (dns_rbtdb_t *)db;
5112
5113         REQUIRE(VALID_RBTDB(search.rbtdb));
5114
5115         if (now == 0)
5116                 isc_stdtime_get(&now);
5117
5118         search.rbtversion = NULL;
5119         search.serial = 1;
5120         search.options = options;
5121         search.copy_name = ISC_FALSE;
5122         search.need_cleanup = ISC_FALSE;
5123         search.wild = ISC_FALSE;
5124         search.zonecut = NULL;
5125         dns_fixedname_init(&search.zonecut_name);
5126         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
5127         search.now = now;
5128
5129         if ((options & DNS_DBFIND_NOEXACT) != 0)
5130                 rbtoptions |= DNS_RBTFIND_NOEXACT;
5131
5132         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5133
5134         /*
5135          * Search down from the root of the tree.
5136          */
5137         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
5138                                   &search.chain, rbtoptions, NULL, &search);
5139
5140         if (result == DNS_R_PARTIALMATCH) {
5141         find_ns:
5142                 result = find_deepest_zonecut(&search, node, nodep, foundname,
5143                                               rdataset, sigrdataset);
5144                 goto tree_exit;
5145         } else if (result != ISC_R_SUCCESS)
5146                 goto tree_exit;
5147
5148         /*
5149          * We now go looking for an NS rdataset at the node.
5150          */
5151
5152         lock = &(search.rbtdb->node_locks[node->locknum].lock);
5153         locktype = isc_rwlocktype_read;
5154         NODE_LOCK(lock, locktype);
5155
5156         found = NULL;
5157         foundsig = NULL;
5158         header_prev = NULL;
5159         for (header = node->data; header != NULL; header = header_next) {
5160                 header_next = header->next;
5161                 if (header->rdh_ttl <= now) {
5162                         /*
5163                          * This rdataset is stale.  If no one else is using the
5164                          * node, we can clean it up right now, otherwise we
5165                          * mark it as stale, and the node as dirty, so it will
5166                          * get cleaned up later.
5167                          */
5168                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5169                             (locktype == isc_rwlocktype_write ||
5170                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5171                                 /*
5172                                  * We update the node's status only when we
5173                                  * can get write access.
5174                                  */
5175                                 locktype = isc_rwlocktype_write;
5176
5177                                 if (dns_rbtnode_refcurrent(node) == 0) {
5178                                         isc_mem_t *mctx;
5179
5180                                         mctx = search.rbtdb->common.mctx;
5181                                         clean_stale_headers(search.rbtdb, mctx,
5182                                                             header);
5183                                         if (header_prev != NULL)
5184                                                 header_prev->next =
5185                                                         header->next;
5186                                         else
5187                                                 node->data = header->next;
5188                                         free_rdataset(search.rbtdb, mctx,
5189                                                       header);
5190                                 } else {
5191                                         header->attributes |=
5192                                                 RDATASET_ATTR_STALE;
5193                                         node->dirty = 1;
5194                                         header_prev = header;
5195                                 }
5196                         } else
5197                                 header_prev = header;
5198                 } else if (EXISTS(header)) {
5199                         /*
5200                          * If we found a type we were looking for, remember
5201                          * it.
5202                          */
5203                         if (header->type == dns_rdatatype_ns) {
5204                                 /*
5205                                  * Remember a NS rdataset even if we're
5206                                  * not specifically looking for it, because
5207                                  * we might need it later.
5208                                  */
5209                                 found = header;
5210                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
5211                                 /*
5212                                  * If we need the NS rdataset, we'll also
5213                                  * need its signature.
5214                                  */
5215                                 foundsig = header;
5216                         }
5217                         header_prev = header;
5218                 } else
5219                         header_prev = header;
5220         }
5221
5222         if (found == NULL) {
5223                 /*
5224                  * No NS records here.
5225                  */
5226                 NODE_UNLOCK(lock, locktype);
5227                 goto find_ns;
5228         }
5229
5230         if (nodep != NULL) {
5231                 new_reference(search.rbtdb, node);
5232                 INSIST(!ISC_LINK_LINKED(node, deadlink));
5233                 *nodep = node;
5234         }
5235
5236         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
5237         if (foundsig != NULL)
5238                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
5239                               sigrdataset);
5240
5241         if (need_headerupdate(found, search.now) ||
5242             (foundsig != NULL &&  need_headerupdate(foundsig, search.now))) {
5243                 if (locktype != isc_rwlocktype_write) {
5244                         NODE_UNLOCK(lock, locktype);
5245                         NODE_LOCK(lock, isc_rwlocktype_write);
5246                         locktype = isc_rwlocktype_write;
5247                 }
5248                 if (need_headerupdate(found, search.now))
5249                         update_header(search.rbtdb, found, search.now);
5250                 if (foundsig != NULL &&
5251                     need_headerupdate(foundsig, search.now)) {
5252                         update_header(search.rbtdb, foundsig, search.now);
5253                 }
5254         }
5255
5256         NODE_UNLOCK(lock, locktype);
5257
5258  tree_exit:
5259         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5260
5261         INSIST(!search.need_cleanup);
5262
5263         dns_rbtnodechain_reset(&search.chain);
5264
5265         if (result == DNS_R_DELEGATION)
5266                 result = ISC_R_SUCCESS;
5267
5268         return (result);
5269 }
5270
5271 static void
5272 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
5273         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5274         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
5275         unsigned int refs;
5276
5277         REQUIRE(VALID_RBTDB(rbtdb));
5278         REQUIRE(targetp != NULL && *targetp == NULL);
5279
5280         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
5281         dns_rbtnode_refincrement(node, &refs);
5282         INSIST(refs != 0);
5283         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
5284
5285         *targetp = source;
5286 }
5287
5288 static void
5289 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
5290         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5291         dns_rbtnode_t *node;
5292         isc_boolean_t want_free = ISC_FALSE;
5293         isc_boolean_t inactive = ISC_FALSE;
5294         rbtdb_nodelock_t *nodelock;
5295
5296         REQUIRE(VALID_RBTDB(rbtdb));
5297         REQUIRE(targetp != NULL && *targetp != NULL);
5298
5299         node = (dns_rbtnode_t *)(*targetp);
5300         nodelock = &rbtdb->node_locks[node->locknum];
5301
5302         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
5303
5304         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
5305                                 isc_rwlocktype_none, ISC_FALSE)) {
5306                 if (isc_refcount_current(&nodelock->references) == 0 &&
5307                     nodelock->exiting) {
5308                         inactive = ISC_TRUE;
5309                 }
5310         }
5311
5312         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
5313
5314         *targetp = NULL;
5315
5316         if (inactive) {
5317                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5318                 rbtdb->active--;
5319                 if (rbtdb->active == 0)
5320                         want_free = ISC_TRUE;
5321                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5322                 if (want_free) {
5323                         char buf[DNS_NAME_FORMATSIZE];
5324                         if (dns_name_dynamic(&rbtdb->common.origin))
5325                                 dns_name_format(&rbtdb->common.origin, buf,
5326                                                 sizeof(buf));
5327                         else
5328                                 strcpy(buf, "<UNKNOWN>");
5329                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
5330                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
5331                                       "calling free_rbtdb(%s)", buf);
5332                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
5333                 }
5334         }
5335 }
5336
5337 static isc_result_t
5338 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
5339         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5340         dns_rbtnode_t *rbtnode = node;
5341         rdatasetheader_t *header;
5342         isc_boolean_t force_expire = ISC_FALSE;
5343         /*
5344          * These are the category and module used by the cache cleaner.
5345          */
5346         isc_boolean_t log = ISC_FALSE;
5347         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
5348         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
5349         int level = ISC_LOG_DEBUG(2);
5350         char printname[DNS_NAME_FORMATSIZE];
5351
5352         REQUIRE(VALID_RBTDB(rbtdb));
5353
5354         /*
5355          * Caller must hold a tree lock.
5356          */
5357
5358         if (now == 0)
5359                 isc_stdtime_get(&now);
5360
5361         if (isc_mem_isovermem(rbtdb->common.mctx)) {
5362                 isc_uint32_t val;
5363
5364                 isc_random_get(&val);
5365                 /*
5366                  * XXXDCL Could stand to have a better policy, like LRU.
5367                  */
5368                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
5369
5370                 /*
5371                  * Note that 'log' can be true IFF overmem is also true.
5372                  * overmem can currently only be true for cache
5373                  * databases -- hence all of the "overmem cache" log strings.
5374                  */
5375                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
5376                 if (log)
5377                         isc_log_write(dns_lctx, category, module, level,
5378                                       "overmem cache: %s %s",
5379                                       force_expire ? "FORCE" : "check",
5380                                       dns_rbt_formatnodename(rbtnode,
5381                                                            printname,
5382                                                            sizeof(printname)));
5383         }
5384
5385         /*
5386          * We may not need write access, but this code path is not performance
5387          * sensitive, so it should be okay to always lock as a writer.
5388          */
5389         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5390                   isc_rwlocktype_write);
5391
5392         for (header = rbtnode->data; header != NULL; header = header->next)
5393                 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
5394                         /*
5395                          * We don't check if refcurrent(rbtnode) == 0 and try
5396                          * to free like we do in cache_find(), because
5397                          * refcurrent(rbtnode) must be non-zero.  This is so
5398                          * because 'node' is an argument to the function.
5399                          */
5400                         header->attributes |= RDATASET_ATTR_STALE;
5401                         rbtnode->dirty = 1;
5402                         if (log)
5403                                 isc_log_write(dns_lctx, category, module,
5404                                               level, "overmem cache: stale %s",
5405                                               printname);
5406                 } else if (force_expire) {
5407                         if (! RETAIN(header)) {
5408                                 set_ttl(rbtdb, header, 0);
5409                                 header->attributes |= RDATASET_ATTR_STALE;
5410                                 rbtnode->dirty = 1;
5411                         } else if (log) {
5412                                 isc_log_write(dns_lctx, category, module,
5413                                               level, "overmem cache: "
5414                                               "reprieve by RETAIN() %s",
5415                                               printname);
5416                         }
5417                 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
5418                         isc_log_write(dns_lctx, category, module, level,
5419                                       "overmem cache: saved %s", printname);
5420
5421         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5422                     isc_rwlocktype_write);
5423
5424         return (ISC_R_SUCCESS);
5425 }
5426
5427 static void
5428 overmem(dns_db_t *db, isc_boolean_t overmem) {
5429         /* This is an empty callback.  See adb.c:water() */
5430
5431         UNUSED(db);
5432         UNUSED(overmem);
5433
5434         return;
5435 }
5436
5437 static void
5438 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5439         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5440         dns_rbtnode_t *rbtnode = node;
5441         isc_boolean_t first;
5442
5443         REQUIRE(VALID_RBTDB(rbtdb));
5444
5445         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5446                   isc_rwlocktype_read);
5447
5448         fprintf(out, "node %p, %u references, locknum = %u\n",
5449                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5450                 rbtnode->locknum);
5451         if (rbtnode->data != NULL) {
5452                 rdatasetheader_t *current, *top_next;
5453
5454                 for (current = rbtnode->data; current != NULL;
5455                      current = top_next) {
5456                         top_next = current->next;
5457                         first = ISC_TRUE;
5458                         fprintf(out, "\ttype %u", current->type);
5459                         do {
5460                                 if (!first)
5461                                         fprintf(out, "\t");
5462                                 first = ISC_FALSE;
5463                                 fprintf(out,
5464                                         "\tserial = %lu, ttl = %u, "
5465                                         "trust = %u, attributes = %u, "
5466                                         "resign = %u\n",
5467                                         (unsigned long)current->serial,
5468                                         current->rdh_ttl,
5469                                         current->trust,
5470                                         current->attributes,
5471                                         current->resign);
5472                                 current = current->down;
5473                         } while (current != NULL);
5474                 }
5475         } else
5476                 fprintf(out, "(empty)\n");
5477
5478         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5479                     isc_rwlocktype_read);
5480 }
5481
5482 static isc_result_t
5483 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5484 {
5485         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5486         rbtdb_dbiterator_t *rbtdbiter;
5487
5488         REQUIRE(VALID_RBTDB(rbtdb));
5489
5490         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5491         if (rbtdbiter == NULL)
5492                 return (ISC_R_NOMEMORY);
5493
5494         rbtdbiter->common.methods = &dbiterator_methods;
5495         rbtdbiter->common.db = NULL;
5496         dns_db_attach(db, &rbtdbiter->common.db);
5497         rbtdbiter->common.relative_names =
5498                         ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5499         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5500         rbtdbiter->common.cleaning = ISC_FALSE;
5501         rbtdbiter->paused = ISC_TRUE;
5502         rbtdbiter->tree_locked = isc_rwlocktype_none;
5503         rbtdbiter->result = ISC_R_SUCCESS;
5504         dns_fixedname_init(&rbtdbiter->name);
5505         dns_fixedname_init(&rbtdbiter->origin);
5506         rbtdbiter->node = NULL;
5507         rbtdbiter->delete = 0;
5508         rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5509         rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5510         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5511         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5512         dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5513         if (rbtdbiter->nsec3only)
5514                 rbtdbiter->current = &rbtdbiter->nsec3chain;
5515         else
5516                 rbtdbiter->current = &rbtdbiter->chain;
5517
5518         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5519
5520         return (ISC_R_SUCCESS);
5521 }
5522
5523 static isc_result_t
5524 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5525                   dns_rdatatype_t type, dns_rdatatype_t covers,
5526                   isc_stdtime_t now, dns_rdataset_t *rdataset,
5527                   dns_rdataset_t *sigrdataset)
5528 {
5529         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5530         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5531         rdatasetheader_t *header, *header_next, *found, *foundsig;
5532         rbtdb_serial_t serial;
5533         rbtdb_version_t *rbtversion = version;
5534         isc_boolean_t close_version = ISC_FALSE;
5535         rbtdb_rdatatype_t matchtype, sigmatchtype;
5536
5537         REQUIRE(VALID_RBTDB(rbtdb));
5538         REQUIRE(type != dns_rdatatype_any);
5539
5540         if (rbtversion == NULL) {
5541                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5542                 close_version = ISC_TRUE;
5543         }
5544         serial = rbtversion->serial;
5545         now = 0;
5546
5547         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5548                   isc_rwlocktype_read);
5549
5550         found = NULL;
5551         foundsig = NULL;
5552         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5553         if (covers == 0)
5554                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5555         else
5556                 sigmatchtype = 0;
5557
5558         for (header = rbtnode->data; header != NULL; header = header_next) {
5559                 header_next = header->next;
5560                 do {
5561                         if (header->serial <= serial &&
5562                             !IGNORE(header)) {
5563                                 /*
5564                                  * Is this a "this rdataset doesn't
5565                                  * exist" record?
5566                                  */
5567                                 if (NONEXISTENT(header))
5568                                         header = NULL;
5569                                 break;
5570                         } else
5571                                 header = header->down;
5572                 } while (header != NULL);
5573                 if (header != NULL) {
5574                         /*
5575                          * We have an active, extant rdataset.  If it's a
5576                          * type we're looking for, remember it.
5577                          */
5578                         if (header->type == matchtype) {
5579                                 found = header;
5580                                 if (foundsig != NULL)
5581                                         break;
5582                         } else if (header->type == sigmatchtype) {
5583                                 foundsig = header;
5584                                 if (found != NULL)
5585                                         break;
5586                         }
5587                 }
5588         }
5589         if (found != NULL) {
5590                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5591                 if (foundsig != NULL)
5592                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5593                                       sigrdataset);
5594         }
5595
5596         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5597                     isc_rwlocktype_read);
5598
5599         if (close_version)
5600                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5601                              ISC_FALSE);
5602
5603         if (found == NULL)
5604                 return (ISC_R_NOTFOUND);
5605
5606         return (ISC_R_SUCCESS);
5607 }
5608
5609 static isc_result_t
5610 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5611                    dns_rdatatype_t type, dns_rdatatype_t covers,
5612                    isc_stdtime_t now, dns_rdataset_t *rdataset,
5613                    dns_rdataset_t *sigrdataset)
5614 {
5615         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5616         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5617         rdatasetheader_t *header, *header_next, *found, *foundsig;
5618         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5619         isc_result_t result;
5620         nodelock_t *lock;
5621         isc_rwlocktype_t locktype;
5622
5623         REQUIRE(VALID_RBTDB(rbtdb));
5624         REQUIRE(type != dns_rdatatype_any);
5625
5626         UNUSED(version);
5627
5628         result = ISC_R_SUCCESS;
5629
5630         if (now == 0)
5631                 isc_stdtime_get(&now);
5632
5633         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5634         locktype = isc_rwlocktype_read;
5635         NODE_LOCK(lock, locktype);
5636
5637         found = NULL;
5638         foundsig = NULL;
5639         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5640         negtype = RBTDB_RDATATYPE_VALUE(0, type);
5641         if (covers == 0)
5642                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5643         else
5644                 sigmatchtype = 0;
5645
5646         for (header = rbtnode->data; header != NULL; header = header_next) {
5647                 header_next = header->next;
5648                 if (header->rdh_ttl <= now) {
5649                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5650                             (locktype == isc_rwlocktype_write ||
5651                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5652                                 /*
5653                                  * We update the node's status only when we
5654                                  * can get write access.
5655                                  */
5656                                 locktype = isc_rwlocktype_write;
5657
5658                                 /*
5659                                  * We don't check if refcurrent(rbtnode) == 0
5660                                  * and try to free like we do in cache_find(),
5661                                  * because refcurrent(rbtnode) must be
5662                                  * non-zero.  This is so because 'node' is an
5663                                  * argument to the function.
5664                                  */
5665                                 header->attributes |= RDATASET_ATTR_STALE;
5666                                 rbtnode->dirty = 1;
5667                         }
5668                 } else if (EXISTS(header)) {
5669                         if (header->type == matchtype)
5670                                 found = header;
5671                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5672                                  header->type == negtype)
5673                                 found = header;
5674                         else if (header->type == sigmatchtype)
5675                                 foundsig = header;
5676                 }
5677         }
5678         if (found != NULL) {
5679                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5680                 if (foundsig != NULL)
5681                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5682                                       sigrdataset);
5683         }
5684
5685         NODE_UNLOCK(lock, locktype);
5686
5687         if (found == NULL)
5688                 return (ISC_R_NOTFOUND);
5689
5690         if (NEGATIVE(found)) {
5691                 /*
5692                  * We found a negative cache entry.
5693                  */
5694                 if (NXDOMAIN(found))
5695                         result = DNS_R_NCACHENXDOMAIN;
5696                 else
5697                         result = DNS_R_NCACHENXRRSET;
5698         }
5699
5700         return (result);
5701 }
5702
5703 static isc_result_t
5704 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5705              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5706 {
5707         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5708         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5709         rbtdb_version_t *rbtversion = version;
5710         rbtdb_rdatasetiter_t *iterator;
5711         unsigned int refs;
5712
5713         REQUIRE(VALID_RBTDB(rbtdb));
5714
5715         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5716         if (iterator == NULL)
5717                 return (ISC_R_NOMEMORY);
5718
5719         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5720                 now = 0;
5721                 if (rbtversion == NULL)
5722                         currentversion(db,
5723                                  (dns_dbversion_t **) (void *)(&rbtversion));
5724                 else {
5725                         unsigned int refs;
5726
5727                         isc_refcount_increment(&rbtversion->references,
5728                                                &refs);
5729                         INSIST(refs > 1);
5730                 }
5731         } else {
5732                 if (now == 0)
5733                         isc_stdtime_get(&now);
5734                 rbtversion = NULL;
5735         }
5736
5737         iterator->common.magic = DNS_RDATASETITER_MAGIC;
5738         iterator->common.methods = &rdatasetiter_methods;
5739         iterator->common.db = db;
5740         iterator->common.node = node;
5741         iterator->common.version = (dns_dbversion_t *)rbtversion;
5742         iterator->common.now = now;
5743
5744         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5745
5746         dns_rbtnode_refincrement(rbtnode, &refs);
5747         INSIST(refs != 0);
5748
5749         iterator->current = NULL;
5750
5751         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5752
5753         *iteratorp = (dns_rdatasetiter_t *)iterator;
5754
5755         return (ISC_R_SUCCESS);
5756 }
5757
5758 static isc_boolean_t
5759 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5760         rdatasetheader_t *header, *header_next;
5761         isc_boolean_t cname, other_data;
5762         dns_rdatatype_t rdtype;
5763
5764         /*
5765          * The caller must hold the node lock.
5766          */
5767
5768         /*
5769          * Look for CNAME and "other data" rdatasets active in our version.
5770          */
5771         cname = ISC_FALSE;
5772         other_data = ISC_FALSE;
5773         for (header = node->data; header != NULL; header = header_next) {
5774                 header_next = header->next;
5775                 if (header->type == dns_rdatatype_cname) {
5776                         /*
5777                          * Look for an active extant CNAME.
5778                          */
5779                         do {
5780                                 if (header->serial <= serial &&
5781                                     !IGNORE(header)) {
5782                                         /*
5783                                          * Is this a "this rdataset doesn't
5784                                          * exist" record?
5785                                          */
5786                                         if (NONEXISTENT(header))
5787                                                 header = NULL;
5788                                         break;
5789                                 } else
5790                                         header = header->down;
5791                         } while (header != NULL);
5792                         if (header != NULL)
5793                                 cname = ISC_TRUE;
5794                 } else {
5795                         /*
5796                          * Look for active extant "other data".
5797                          *
5798                          * "Other data" is any rdataset whose type is not
5799                          * KEY, NSEC, SIG or RRSIG.
5800                          */
5801                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
5802                         if (rdtype != dns_rdatatype_key &&
5803                             rdtype != dns_rdatatype_sig &&
5804                             rdtype != dns_rdatatype_nsec &&
5805                             rdtype != dns_rdatatype_rrsig) {
5806                                 /*
5807                                  * Is it active and extant?
5808                                  */
5809                                 do {
5810                                         if (header->serial <= serial &&
5811                                             !IGNORE(header)) {
5812                                                 /*
5813                                                  * Is this a "this rdataset
5814                                                  * doesn't exist" record?
5815                                                  */
5816                                                 if (NONEXISTENT(header))
5817                                                         header = NULL;
5818                                                 break;
5819                                         } else
5820                                                 header = header->down;
5821                                 } while (header != NULL);
5822                                 if (header != NULL)
5823                                         other_data = ISC_TRUE;
5824                         }
5825                 }
5826         }
5827
5828         if (cname && other_data)
5829                 return (ISC_TRUE);
5830
5831         return (ISC_FALSE);
5832 }
5833
5834 static isc_result_t
5835 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5836         isc_result_t result;
5837
5838         INSIST(!IS_CACHE(rbtdb));
5839         INSIST(newheader->heap_index == 0);
5840         INSIST(!ISC_LINK_LINKED(newheader, link));
5841
5842         result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5843         return (result);
5844 }
5845
5846 static isc_result_t
5847 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5848     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5849     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5850 {
5851         rbtdb_changed_t *changed = NULL;
5852         rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
5853         unsigned char *merged;
5854         isc_result_t result;
5855         isc_boolean_t header_nx;
5856         isc_boolean_t newheader_nx;
5857         isc_boolean_t merge;
5858         dns_rdatatype_t rdtype, covers;
5859         rbtdb_rdatatype_t negtype, sigtype;
5860         dns_trust_t trust;
5861         int idx;
5862
5863         /*
5864          * Add an rdatasetheader_t to a node.
5865          */
5866
5867         /*
5868          * Caller must be holding the node lock.
5869          */
5870
5871         if ((options & DNS_DBADD_MERGE) != 0) {
5872                 REQUIRE(rbtversion != NULL);
5873                 merge = ISC_TRUE;
5874         } else
5875                 merge = ISC_FALSE;
5876
5877         if ((options & DNS_DBADD_FORCE) != 0)
5878                 trust = dns_trust_ultimate;
5879         else
5880                 trust = newheader->trust;
5881
5882         if (rbtversion != NULL && !loading) {
5883                 /*
5884                  * We always add a changed record, even if no changes end up
5885                  * being made to this node, because it's harmless and
5886                  * simplifies the code.
5887                  */
5888                 changed = add_changed(rbtdb, rbtversion, rbtnode);
5889                 if (changed == NULL) {
5890                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5891                         return (ISC_R_NOMEMORY);
5892                 }
5893         }
5894
5895         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5896         topheader_prev = NULL;
5897         sigheader = NULL;
5898         negtype = 0;
5899         if (rbtversion == NULL && !newheader_nx) {
5900                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5901                 if (NEGATIVE(newheader)) {
5902                         /*
5903                          * We're adding a negative cache entry.
5904                          */
5905                         covers = RBTDB_RDATATYPE_EXT(newheader->type);
5906                         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
5907                                                         covers);
5908                         for (topheader = rbtnode->data;
5909                              topheader != NULL;
5910                              topheader = topheader->next) {
5911                                 /*
5912                                  * If we're adding an negative cache entry
5913                                  * which covers all types (NXDOMAIN,
5914                                  * NODATA(QTYPE=ANY)).
5915                                  *
5916                                  * We make all other data stale so that the
5917                                  * only rdataset that can be found at this
5918                                  * node is the negative cache entry.
5919                                  *
5920                                  * Otherwise look for any RRSIGs of the
5921                                  * given type so they can be marked stale
5922                                  * later.
5923                                  */
5924                                 if (covers == dns_rdatatype_any) {
5925                                         set_ttl(rbtdb, topheader, 0);
5926                                         topheader->attributes |=
5927                                                 RDATASET_ATTR_STALE;
5928                                         rbtnode->dirty = 1;
5929                                 } else if (topheader->type == sigtype)
5930                                         sigheader = topheader;
5931                         }
5932                         if (covers == dns_rdatatype_any)
5933                                 goto find_header;
5934                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5935                 } else {
5936                         /*
5937                          * We're adding something that isn't a
5938                          * negative cache entry.  Look for an extant
5939                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5940                          * cache entry.
5941                          */
5942                         for (topheader = rbtnode->data;
5943                              topheader != NULL;
5944                              topheader = topheader->next) {
5945                                 if (topheader->type ==
5946                                     RBTDB_RDATATYPE_NCACHEANY)
5947                                         break;
5948                         }
5949                         if (topheader != NULL && EXISTS(topheader) &&
5950                             topheader->rdh_ttl > now) {
5951                                 /*
5952                                  * Found one.
5953                                  */
5954                                 if (trust < topheader->trust) {
5955                                         /*
5956                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
5957                                          * is more trusted.
5958                                          */
5959                                         free_rdataset(rbtdb,
5960                                                       rbtdb->common.mctx,
5961                                                       newheader);
5962                                         if (addedrdataset != NULL)
5963                                                 bind_rdataset(rbtdb, rbtnode,
5964                                                               topheader, now,
5965                                                               addedrdataset);
5966                                         return (DNS_R_UNCHANGED);
5967                                 }
5968                                 /*
5969                                  * The new rdataset is better.  Expire the
5970                                  * NXDOMAIN/NODATA(QTYPE=ANY).
5971                                  */
5972                                 set_ttl(rbtdb, topheader, 0);
5973                                 topheader->attributes |= RDATASET_ATTR_STALE;
5974                                 rbtnode->dirty = 1;
5975                                 topheader = NULL;
5976                                 goto find_header;
5977                         }
5978                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5979                 }
5980         }
5981
5982         for (topheader = rbtnode->data;
5983              topheader != NULL;
5984              topheader = topheader->next) {
5985                 if (topheader->type == newheader->type ||
5986                     topheader->type == negtype)
5987                         break;
5988                 topheader_prev = topheader;
5989         }
5990
5991  find_header:
5992         /*
5993          * If header isn't NULL, we've found the right type.  There may be
5994          * IGNORE rdatasets between the top of the chain and the first real
5995          * data.  We skip over them.
5996          */
5997         header = topheader;
5998         while (header != NULL && IGNORE(header))
5999                 header = header->down;
6000         if (header != NULL) {
6001                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
6002
6003                 /*
6004                  * Deleting an already non-existent rdataset has no effect.
6005                  */
6006                 if (header_nx && newheader_nx) {
6007                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6008                         return (DNS_R_UNCHANGED);
6009                 }
6010
6011                 /*
6012                  * Trying to add an rdataset with lower trust to a cache DB
6013                  * has no effect, provided that the cache data isn't stale.
6014                  */
6015                 if (rbtversion == NULL && trust < header->trust &&
6016                     (header->rdh_ttl > now || header_nx)) {
6017                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6018                         if (addedrdataset != NULL)
6019                                 bind_rdataset(rbtdb, rbtnode, header, now,
6020                                               addedrdataset);
6021                         return (DNS_R_UNCHANGED);
6022                 }
6023
6024                 /*
6025                  * Don't merge if a nonexistent rdataset is involved.
6026                  */
6027                 if (merge && (header_nx || newheader_nx))
6028                         merge = ISC_FALSE;
6029
6030                 /*
6031                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
6032                  * that is the union of 'newheader' and 'header'.
6033                  */
6034                 if (merge) {
6035                         unsigned int flags = 0;
6036                         INSIST(rbtversion->serial >= header->serial);
6037                         merged = NULL;
6038                         result = ISC_R_SUCCESS;
6039
6040                         if ((options & DNS_DBADD_EXACT) != 0)
6041                                 flags |= DNS_RDATASLAB_EXACT;
6042                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
6043                              newheader->rdh_ttl != header->rdh_ttl)
6044                                         result = DNS_R_NOTEXACT;
6045                         else if (newheader->rdh_ttl != header->rdh_ttl)
6046                                 flags |= DNS_RDATASLAB_FORCE;
6047                         if (result == ISC_R_SUCCESS)
6048                                 result = dns_rdataslab_merge(
6049                                              (unsigned char *)header,
6050                                              (unsigned char *)newheader,
6051                                              (unsigned int)(sizeof(*newheader)),
6052                                              rbtdb->common.mctx,
6053                                              rbtdb->common.rdclass,
6054                                              (dns_rdatatype_t)header->type,
6055                                              flags, &merged);
6056                         if (result == ISC_R_SUCCESS) {
6057                                 /*
6058                                  * If 'header' has the same serial number as
6059                                  * we do, we could clean it up now if we knew
6060                                  * that our caller had no references to it.
6061                                  * We don't know this, however, so we leave it
6062                                  * alone.  It will get cleaned up when
6063                                  * clean_zone_node() runs.
6064                                  */
6065                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6066                                               newheader);
6067                                 newheader = (rdatasetheader_t *)merged;
6068                                 init_rdataset(rbtdb, newheader);
6069                                 if (loading && RESIGN(newheader) &&
6070                                     RESIGN(header) &&
6071                                     header->resign < newheader->resign)
6072                                         newheader->resign = header->resign;
6073                         } else {
6074                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6075                                               newheader);
6076                                 return (result);
6077                         }
6078                 }
6079                 /*
6080                  * Don't replace existing NS, A and AAAA RRsets
6081                  * in the cache if they are already exist.  This
6082                  * prevents named being locked to old servers.
6083                  * Don't lower trust of existing record if the
6084                  * update is forced.
6085                  */
6086                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
6087                     header->type == dns_rdatatype_ns &&
6088                     !header_nx && !newheader_nx &&
6089                     header->trust >= newheader->trust &&
6090                     dns_rdataslab_equalx((unsigned char *)header,
6091                                          (unsigned char *)newheader,
6092                                          (unsigned int)(sizeof(*newheader)),
6093                                          rbtdb->common.rdclass,
6094                                          (dns_rdatatype_t)header->type)) {
6095                         /*
6096                          * Honour the new ttl if it is less than the
6097                          * older one.
6098                          */
6099                         if (header->rdh_ttl > newheader->rdh_ttl)
6100                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
6101                         if (header->noqname == NULL &&
6102                             newheader->noqname != NULL) {
6103                                 header->noqname = newheader->noqname;
6104                                 newheader->noqname = NULL;
6105                         }
6106                         if (header->closest == NULL &&
6107                             newheader->closest != NULL) {
6108                                 header->closest = newheader->closest;
6109                                 newheader->closest = NULL;
6110                         }
6111                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6112                         if (addedrdataset != NULL)
6113                                 bind_rdataset(rbtdb, rbtnode, header, now,
6114                                               addedrdataset);
6115                         return (ISC_R_SUCCESS);
6116                 }
6117                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
6118                     (header->type == dns_rdatatype_a ||
6119                      header->type == dns_rdatatype_aaaa) &&
6120                     !header_nx && !newheader_nx &&
6121                     header->trust >= newheader->trust &&
6122                     dns_rdataslab_equal((unsigned char *)header,
6123                                         (unsigned char *)newheader,
6124                                         (unsigned int)(sizeof(*newheader)))) {
6125                         /*
6126                          * Honour the new ttl if it is less than the
6127                          * older one.
6128                          */
6129                         if (header->rdh_ttl > newheader->rdh_ttl)
6130                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
6131                         if (header->noqname == NULL &&
6132                             newheader->noqname != NULL) {
6133                                 header->noqname = newheader->noqname;
6134                                 newheader->noqname = NULL;
6135                         }
6136                         if (header->closest == NULL &&
6137                             newheader->closest != NULL) {
6138                                 header->closest = newheader->closest;
6139                                 newheader->closest = NULL;
6140                         }
6141                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6142                         if (addedrdataset != NULL)
6143                                 bind_rdataset(rbtdb, rbtnode, header, now,
6144                                               addedrdataset);
6145                         return (ISC_R_SUCCESS);
6146                 }
6147                 INSIST(rbtversion == NULL ||
6148                        rbtversion->serial >= topheader->serial);
6149                 if (topheader_prev != NULL)
6150                         topheader_prev->next = newheader;
6151                 else
6152                         rbtnode->data = newheader;
6153                 newheader->next = topheader->next;
6154                 if (loading) {
6155                         /*
6156                          * There are no other references to 'header' when
6157                          * loading, so we MAY clean up 'header' now.
6158                          * Since we don't generate changed records when
6159                          * loading, we MUST clean up 'header' now.
6160                          */
6161                         newheader->down = NULL;
6162                         free_rdataset(rbtdb, rbtdb->common.mctx, header);
6163                 } else {
6164                         newheader->down = topheader;
6165                         topheader->next = newheader;
6166                         rbtnode->dirty = 1;
6167                         if (changed != NULL)
6168                                 changed->dirty = ISC_TRUE;
6169                         if (rbtversion == NULL) {
6170                                 set_ttl(rbtdb, header, 0);
6171                                 header->attributes |= RDATASET_ATTR_STALE;
6172                                 if (sigheader != NULL) {
6173                                         set_ttl(rbtdb, sigheader, 0);
6174                                         sigheader->attributes |=
6175                                                  RDATASET_ATTR_STALE;
6176                                 }
6177                         }
6178                         idx = newheader->node->locknum;
6179                         if (IS_CACHE(rbtdb)) {
6180                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6181                                                  newheader, link);
6182                                 /*
6183                                  * XXXMLG We don't check the return value
6184                                  * here.  If it fails, we will not do TTL
6185                                  * based expiry on this node.  However, we
6186                                  * will do it on the LRU side, so memory
6187                                  * will not leak... for long.
6188                                  */
6189                                 isc_heap_insert(rbtdb->heaps[idx], newheader);
6190                         } else if (RESIGN(newheader))
6191                                 resign_insert(rbtdb, idx, newheader);
6192                 }
6193         } else {
6194                 /*
6195                  * No non-IGNORED rdatasets of the given type exist at
6196                  * this node.
6197                  */
6198
6199                 /*
6200                  * If we're trying to delete the type, don't bother.
6201                  */
6202                 if (newheader_nx) {
6203                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6204                         return (DNS_R_UNCHANGED);
6205                 }
6206
6207                 if (topheader != NULL) {
6208                         /*
6209                          * We have an list of rdatasets of the given type,
6210                          * but they're all marked IGNORE.  We simply insert
6211                          * the new rdataset at the head of the list.
6212                          *
6213                          * Ignored rdatasets cannot occur during loading, so
6214                          * we INSIST on it.
6215                          */
6216                         INSIST(!loading);
6217                         INSIST(rbtversion == NULL ||
6218                                rbtversion->serial >= topheader->serial);
6219                         if (topheader_prev != NULL)
6220                                 topheader_prev->next = newheader;
6221                         else
6222                                 rbtnode->data = newheader;
6223                         newheader->next = topheader->next;
6224                         newheader->down = topheader;
6225                         topheader->next = newheader;
6226                         rbtnode->dirty = 1;
6227                         if (changed != NULL)
6228                                 changed->dirty = ISC_TRUE;
6229                 } else {
6230                         /*
6231                          * No rdatasets of the given type exist at the node.
6232                          */
6233                         newheader->next = rbtnode->data;
6234                         newheader->down = NULL;
6235                         rbtnode->data = newheader;
6236                 }
6237                 idx = newheader->node->locknum;
6238                 if (IS_CACHE(rbtdb)) {
6239                         ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6240                                          newheader, link);
6241                         isc_heap_insert(rbtdb->heaps[idx], newheader);
6242                 } else if (RESIGN(newheader)) {
6243                         resign_insert(rbtdb, idx, newheader);
6244                 }
6245         }
6246
6247         /*
6248          * Check if the node now contains CNAME and other data.
6249          */
6250         if (rbtversion != NULL &&
6251             cname_and_other_data(rbtnode, rbtversion->serial))
6252                 return (DNS_R_CNAMEANDOTHER);
6253
6254         if (addedrdataset != NULL)
6255                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
6256
6257         return (ISC_R_SUCCESS);
6258 }
6259
6260 static inline isc_boolean_t
6261 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
6262                 rbtdb_rdatatype_t type)
6263 {
6264         if (IS_CACHE(rbtdb)) {
6265                 if (type == dns_rdatatype_dname)
6266                         return (ISC_TRUE);
6267                 else
6268                         return (ISC_FALSE);
6269         } else if (type == dns_rdatatype_dname ||
6270                    (type == dns_rdatatype_ns &&
6271                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
6272                 return (ISC_TRUE);
6273         return (ISC_FALSE);
6274 }
6275
6276 static inline isc_result_t
6277 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6278            dns_rdataset_t *rdataset)
6279 {
6280         struct noqname *noqname;
6281         isc_mem_t *mctx = rbtdb->common.mctx;
6282         dns_name_t name;
6283         dns_rdataset_t neg, negsig;
6284         isc_result_t result;
6285         isc_region_t r;
6286
6287         dns_name_init(&name, NULL);
6288         dns_rdataset_init(&neg);
6289         dns_rdataset_init(&negsig);
6290
6291         result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
6292         RUNTIME_CHECK(result == ISC_R_SUCCESS);
6293
6294         noqname = isc_mem_get(mctx, sizeof(*noqname));
6295         if (noqname == NULL) {
6296                 result = ISC_R_NOMEMORY;
6297                 goto cleanup;
6298         }
6299         dns_name_init(&noqname->name, NULL);
6300         noqname->neg = NULL;
6301         noqname->negsig = NULL;
6302         noqname->type = neg.type;
6303         result = dns_name_dup(&name, mctx, &noqname->name);
6304         if (result != ISC_R_SUCCESS)
6305                 goto cleanup;
6306         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6307         if (result != ISC_R_SUCCESS)
6308                 goto cleanup;
6309         noqname->neg = r.base;
6310         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6311         if (result != ISC_R_SUCCESS)
6312                 goto cleanup;
6313         noqname->negsig = r.base;
6314         dns_rdataset_disassociate(&neg);
6315         dns_rdataset_disassociate(&negsig);
6316         newheader->noqname = noqname;
6317         return (ISC_R_SUCCESS);
6318
6319 cleanup:
6320         dns_rdataset_disassociate(&neg);
6321         dns_rdataset_disassociate(&negsig);
6322         free_noqname(mctx, &noqname);
6323         return(result);
6324 }
6325
6326 static inline isc_result_t
6327 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6328            dns_rdataset_t *rdataset)
6329 {
6330         struct noqname *closest;
6331         isc_mem_t *mctx = rbtdb->common.mctx;
6332         dns_name_t name;
6333         dns_rdataset_t neg, negsig;
6334         isc_result_t result;
6335         isc_region_t r;
6336
6337         dns_name_init(&name, NULL);
6338         dns_rdataset_init(&neg);
6339         dns_rdataset_init(&negsig);
6340
6341         result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
6342         RUNTIME_CHECK(result == ISC_R_SUCCESS);
6343
6344         closest = isc_mem_get(mctx, sizeof(*closest));
6345         if (closest == NULL) {
6346                 result = ISC_R_NOMEMORY;
6347                 goto cleanup;
6348         }
6349         dns_name_init(&closest->name, NULL);
6350         closest->neg = NULL;
6351         closest->negsig = NULL;
6352         closest->type = neg.type;
6353         result = dns_name_dup(&name, mctx, &closest->name);
6354         if (result != ISC_R_SUCCESS)
6355                 goto cleanup;
6356         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6357         if (result != ISC_R_SUCCESS)
6358                 goto cleanup;
6359         closest->neg = r.base;
6360         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6361         if (result != ISC_R_SUCCESS)
6362                 goto cleanup;
6363         closest->negsig = r.base;
6364         dns_rdataset_disassociate(&neg);
6365         dns_rdataset_disassociate(&negsig);
6366         newheader->closest = closest;
6367         return (ISC_R_SUCCESS);
6368
6369  cleanup:
6370         dns_rdataset_disassociate(&neg);
6371         dns_rdataset_disassociate(&negsig);
6372         free_noqname(mctx, &closest);
6373         return(result);
6374 }
6375
6376 static dns_dbmethods_t zone_methods;
6377
6378 static isc_result_t
6379 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6380             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
6381             dns_rdataset_t *addedrdataset)
6382 {
6383         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6384         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6385         rbtdb_version_t *rbtversion = version;
6386         isc_region_t region;
6387         rdatasetheader_t *newheader;
6388         rdatasetheader_t *header;
6389         isc_result_t result;
6390         isc_boolean_t delegating;
6391         isc_boolean_t newnsec;
6392         isc_boolean_t tree_locked = ISC_FALSE;
6393         isc_boolean_t cache_is_overmem = ISC_FALSE;
6394
6395         REQUIRE(VALID_RBTDB(rbtdb));
6396
6397         if (rbtdb->common.methods == &zone_methods)
6398                 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6399                           (rdataset->type == dns_rdatatype_nsec3 ||
6400                            rdataset->covers == dns_rdatatype_nsec3)) ||
6401                          (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6402                            rdataset->type != dns_rdatatype_nsec3 &&
6403                            rdataset->covers != dns_rdatatype_nsec3)));
6404
6405         if (rbtversion == NULL) {
6406                 if (now == 0)
6407                         isc_stdtime_get(&now);
6408         } else
6409                 now = 0;
6410
6411         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6412                                             &region,
6413                                             sizeof(rdatasetheader_t));
6414         if (result != ISC_R_SUCCESS)
6415                 return (result);
6416
6417         newheader = (rdatasetheader_t *)region.base;
6418         init_rdataset(rbtdb, newheader);
6419         set_ttl(rbtdb, newheader, rdataset->ttl + now);
6420         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6421                                                 rdataset->covers);
6422         newheader->attributes = 0;
6423         newheader->noqname = NULL;
6424         newheader->closest = NULL;
6425         newheader->count = init_count++;
6426         newheader->trust = rdataset->trust;
6427         newheader->additional_auth = NULL;
6428         newheader->additional_glue = NULL;
6429         newheader->last_used = now;
6430         newheader->node = rbtnode;
6431         if (rbtversion != NULL) {
6432                 newheader->serial = rbtversion->serial;
6433                 now = 0;
6434
6435                 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6436                         newheader->attributes |= RDATASET_ATTR_RESIGN;
6437                         newheader->resign = rdataset->resign;
6438                 } else
6439                         newheader->resign = 0;
6440         } else {
6441                 newheader->serial = 1;
6442                 newheader->resign = 0;
6443                 if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
6444                         newheader->attributes |= RDATASET_ATTR_NEGATIVE;
6445                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6446                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6447                 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6448                         newheader->attributes |= RDATASET_ATTR_OPTOUT;
6449                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6450                         result = addnoqname(rbtdb, newheader, rdataset);
6451                         if (result != ISC_R_SUCCESS) {
6452                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6453                                               newheader);
6454                                 return (result);
6455                         }
6456                 }
6457                 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6458                         result = addclosest(rbtdb, newheader, rdataset);
6459                         if (result != ISC_R_SUCCESS) {
6460                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6461                                               newheader);
6462                                 return (result);
6463                         }
6464                 }
6465         }
6466
6467         /*
6468          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6469          * just DNAME for the cache), then we need to set the callback bit
6470          * on the node.
6471          */
6472         if (delegating_type(rbtdb, rbtnode, rdataset->type))
6473                 delegating = ISC_TRUE;
6474         else
6475                 delegating = ISC_FALSE;
6476
6477         /*
6478          * Add to the auxiliary NSEC tree if we're adding an NSEC record.
6479          */
6480         if (rbtnode->nsec != DNS_RBT_NSEC_HAS_NSEC &&
6481             rdataset->type == dns_rdatatype_nsec)
6482                 newnsec = ISC_TRUE;
6483         else
6484                 newnsec = ISC_FALSE;
6485
6486         /*
6487          * If we're adding a delegation type, adding to the auxiliary NSEC tree,
6488          * or the DB is a cache in an overmem state, hold an exclusive lock on
6489          * the tree.  In the latter case the lock does not necessarily have to
6490          * be acquired but it will help purge stale entries more effectively.
6491          */
6492         if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
6493                 cache_is_overmem = ISC_TRUE;
6494         if (delegating || newnsec || cache_is_overmem) {
6495                 tree_locked = ISC_TRUE;
6496                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6497         }
6498
6499         if (cache_is_overmem)
6500                 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6501
6502         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6503                   isc_rwlocktype_write);
6504
6505         if (rbtdb->rrsetstats != NULL) {
6506                 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6507                 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6508         }
6509
6510         if (IS_CACHE(rbtdb)) {
6511                 if (tree_locked)
6512                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6513
6514                 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6515                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6516                         expire_header(rbtdb, header, tree_locked);
6517
6518                 /*
6519                  * If we've been holding a write lock on the tree just for
6520                  * cleaning, we can release it now.  However, we still need the
6521                  * node lock.
6522                  */
6523                 if (tree_locked && !delegating && !newnsec) {
6524                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6525                         tree_locked = ISC_FALSE;
6526                 }
6527         }
6528
6529         result = ISC_R_SUCCESS;
6530         if (newnsec) {
6531                 dns_fixedname_t fname;
6532                 dns_name_t *name;
6533                 dns_rbtnode_t *nsecnode;
6534
6535                 dns_fixedname_init(&fname);
6536                 name = dns_fixedname_name(&fname);
6537                 dns_rbt_fullnamefromnode(rbtnode, name);
6538                 nsecnode = NULL;
6539                 result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6540                 if (result == ISC_R_SUCCESS) {
6541                         nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6542                         rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6543                 } else if (result == ISC_R_EXISTS) {
6544                         rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6545                         result = ISC_R_SUCCESS;
6546                 }
6547         }
6548
6549         if (result == ISC_R_SUCCESS)
6550                 result = add(rbtdb, rbtnode, rbtversion, newheader, options,
6551                              ISC_FALSE, addedrdataset, now);
6552         if (result == ISC_R_SUCCESS && delegating)
6553                 rbtnode->find_callback = 1;
6554
6555         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6556                     isc_rwlocktype_write);
6557
6558         if (tree_locked)
6559                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6560
6561         /*
6562          * Update the zone's secure status.  If version is non-NULL
6563          * this is deferred until closeversion() is called.
6564          */
6565         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6566                 iszonesecure(db, version, rbtdb->origin_node);
6567
6568         return (result);
6569 }
6570
6571 static isc_result_t
6572 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6573                  dns_rdataset_t *rdataset, unsigned int options,
6574                  dns_rdataset_t *newrdataset)
6575 {
6576         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6577         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6578         rbtdb_version_t *rbtversion = version;
6579         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6580         unsigned char *subresult;
6581         isc_region_t region;
6582         isc_result_t result;
6583         rbtdb_changed_t *changed;
6584
6585         REQUIRE(VALID_RBTDB(rbtdb));
6586
6587         if (rbtdb->common.methods == &zone_methods)
6588                 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6589                           (rdataset->type == dns_rdatatype_nsec3 ||
6590                            rdataset->covers == dns_rdatatype_nsec3)) ||
6591                          (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6592                            rdataset->type != dns_rdatatype_nsec3 &&
6593                            rdataset->covers != dns_rdatatype_nsec3)));
6594
6595         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6596                                             &region,
6597                                             sizeof(rdatasetheader_t));
6598         if (result != ISC_R_SUCCESS)
6599                 return (result);
6600         newheader = (rdatasetheader_t *)region.base;
6601         init_rdataset(rbtdb, newheader);
6602         set_ttl(rbtdb, newheader, rdataset->ttl);
6603         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6604                                                 rdataset->covers);
6605         newheader->attributes = 0;
6606         newheader->serial = rbtversion->serial;
6607         newheader->trust = 0;
6608         newheader->noqname = NULL;
6609         newheader->closest = NULL;
6610         newheader->count = init_count++;
6611         newheader->additional_auth = NULL;
6612         newheader->additional_glue = NULL;
6613         newheader->last_used = 0;
6614         newheader->node = rbtnode;
6615         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6616                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6617                 newheader->resign = rdataset->resign;
6618         } else
6619                 newheader->resign = 0;
6620
6621         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6622                   isc_rwlocktype_write);
6623
6624         changed = add_changed(rbtdb, rbtversion, rbtnode);
6625         if (changed == NULL) {
6626                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6627                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6628                             isc_rwlocktype_write);
6629                 return (ISC_R_NOMEMORY);
6630         }
6631
6632         topheader_prev = NULL;
6633         for (topheader = rbtnode->data;
6634              topheader != NULL;
6635              topheader = topheader->next) {
6636                 if (topheader->type == newheader->type)
6637                         break;
6638                 topheader_prev = topheader;
6639         }
6640         /*
6641          * If header isn't NULL, we've found the right type.  There may be
6642          * IGNORE rdatasets between the top of the chain and the first real
6643          * data.  We skip over them.
6644          */
6645         header = topheader;
6646         while (header != NULL && IGNORE(header))
6647                 header = header->down;
6648         if (header != NULL && EXISTS(header)) {
6649                 unsigned int flags = 0;
6650                 subresult = NULL;
6651                 result = ISC_R_SUCCESS;
6652                 if ((options & DNS_DBSUB_EXACT) != 0) {
6653                         flags |= DNS_RDATASLAB_EXACT;
6654                         if (newheader->rdh_ttl != header->rdh_ttl)
6655                                 result = DNS_R_NOTEXACT;
6656                 }
6657                 if (result == ISC_R_SUCCESS)
6658                         result = dns_rdataslab_subtract(
6659                                         (unsigned char *)header,
6660                                         (unsigned char *)newheader,
6661                                         (unsigned int)(sizeof(*newheader)),
6662                                         rbtdb->common.mctx,
6663                                         rbtdb->common.rdclass,
6664                                         (dns_rdatatype_t)header->type,
6665                                         flags, &subresult);
6666                 if (result == ISC_R_SUCCESS) {
6667                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6668                         newheader = (rdatasetheader_t *)subresult;
6669                         init_rdataset(rbtdb, newheader);
6670                         /*
6671                          * We have to set the serial since the rdataslab
6672                          * subtraction routine copies the reserved portion of
6673                          * header, not newheader.
6674                          */
6675                         newheader->serial = rbtversion->serial;
6676                         /*
6677                          * XXXJT: dns_rdataslab_subtract() copied the pointers
6678                          * to additional info.  We need to clear these fields
6679                          * to avoid having duplicated references.
6680                          */
6681                         newheader->additional_auth = NULL;
6682                         newheader->additional_glue = NULL;
6683                 } else if (result == DNS_R_NXRRSET) {
6684                         /*
6685                          * This subtraction would remove all of the rdata;
6686                          * add a nonexistent header instead.
6687                          */
6688                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6689                         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6690                         if (newheader == NULL) {
6691                                 result = ISC_R_NOMEMORY;
6692                                 goto unlock;
6693                         }
6694                         set_ttl(rbtdb, newheader, 0);
6695                         newheader->type = topheader->type;
6696                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6697                         newheader->trust = 0;
6698                         newheader->serial = rbtversion->serial;
6699                         newheader->noqname = NULL;
6700                         newheader->closest = NULL;
6701                         newheader->count = 0;
6702                         newheader->additional_auth = NULL;
6703                         newheader->additional_glue = NULL;
6704                         newheader->node = rbtnode;
6705                         newheader->resign = 0;
6706                         newheader->last_used = 0;
6707                 } else {
6708                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6709                         goto unlock;
6710                 }
6711
6712                 /*
6713                  * If we're here, we want to link newheader in front of
6714                  * topheader.
6715                  */
6716                 INSIST(rbtversion->serial >= topheader->serial);
6717                 if (topheader_prev != NULL)
6718                         topheader_prev->next = newheader;
6719                 else
6720                         rbtnode->data = newheader;
6721                 newheader->next = topheader->next;
6722                 newheader->down = topheader;
6723                 topheader->next = newheader;
6724                 rbtnode->dirty = 1;
6725                 changed->dirty = ISC_TRUE;
6726         } else {
6727                 /*
6728                  * The rdataset doesn't exist, so we don't need to do anything
6729                  * to satisfy the deletion request.
6730                  */
6731                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6732                 if ((options & DNS_DBSUB_EXACT) != 0)
6733                         result = DNS_R_NOTEXACT;
6734                 else
6735                         result = DNS_R_UNCHANGED;
6736         }
6737
6738         if (result == ISC_R_SUCCESS && newrdataset != NULL)
6739                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6740
6741  unlock:
6742         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6743                     isc_rwlocktype_write);
6744
6745         /*
6746          * Update the zone's secure status.  If version is non-NULL
6747          * this is deferred until closeversion() is called.
6748          */
6749         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6750                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6751
6752         return (result);
6753 }
6754
6755 static isc_result_t
6756 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6757                dns_rdatatype_t type, dns_rdatatype_t covers)
6758 {
6759         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6760         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6761         rbtdb_version_t *rbtversion = version;
6762         isc_result_t result;
6763         rdatasetheader_t *newheader;
6764
6765         REQUIRE(VALID_RBTDB(rbtdb));
6766
6767         if (type == dns_rdatatype_any)
6768                 return (ISC_R_NOTIMPLEMENTED);
6769         if (type == dns_rdatatype_rrsig && covers == 0)
6770                 return (ISC_R_NOTIMPLEMENTED);
6771
6772         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6773         if (newheader == NULL)
6774                 return (ISC_R_NOMEMORY);
6775         set_ttl(rbtdb, newheader, 0);
6776         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6777         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6778         newheader->trust = 0;
6779         newheader->noqname = NULL;
6780         newheader->closest = NULL;
6781         newheader->additional_auth = NULL;
6782         newheader->additional_glue = NULL;
6783         if (rbtversion != NULL)
6784                 newheader->serial = rbtversion->serial;
6785         else
6786                 newheader->serial = 0;
6787         newheader->count = 0;
6788         newheader->last_used = 0;
6789         newheader->node = rbtnode;
6790
6791         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6792                   isc_rwlocktype_write);
6793
6794         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6795                      ISC_FALSE, NULL, 0);
6796
6797         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6798                     isc_rwlocktype_write);
6799
6800         /*
6801          * Update the zone's secure status.  If version is non-NULL
6802          * this is deferred until closeversion() is called.
6803          */
6804         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6805                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6806
6807         return (result);
6808 }
6809
6810 /*
6811  * load a non-NSEC3 node in the main tree and optionally to the auxiliary NSEC
6812  */
6813 static isc_result_t
6814 loadnode(dns_rbtdb_t *rbtdb, dns_name_t *name, dns_rbtnode_t **nodep,
6815          isc_boolean_t hasnsec)
6816 {
6817         isc_result_t noderesult, nsecresult;
6818         dns_rbtnode_t *nsecnode;
6819
6820         noderesult = dns_rbt_addnode(rbtdb->tree, name, nodep);
6821
6822 #ifdef BIND9
6823         if (noderesult == ISC_R_SUCCESS)
6824                 dns_rpz_cidr_addip(rbtdb->rpz_cidr, name);
6825 #endif
6826
6827         if (!hasnsec)
6828                 return (noderesult);
6829         if (noderesult == ISC_R_EXISTS) {
6830                 /*
6831                  * Add a node to the auxiliary NSEC tree for an old node
6832                  * just now getting an NSEC record.
6833                  */
6834                 if ((*nodep)->nsec == DNS_RBT_NSEC_HAS_NSEC)
6835                         return (noderesult);
6836         } else if (noderesult != ISC_R_SUCCESS) {
6837                 return (noderesult);
6838         }
6839
6840         /*
6841          * Build the auxiliary tree for NSECs as we go.
6842          * This tree speeds searches for closest NSECs that would otherwise
6843          * need to examine many irrelevant nodes in large TLDs.
6844          *
6845          * Add nodes to the auxiliary tree after corresponding nodes have
6846          * been added to the main tree.
6847          */
6848         nsecnode = NULL;
6849         nsecresult = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6850         if (nsecresult == ISC_R_SUCCESS) {
6851                 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6852                 (*nodep)->nsec = DNS_RBT_NSEC_HAS_NSEC;
6853                 return (noderesult);
6854         }
6855
6856         if (nsecresult == ISC_R_EXISTS) {
6857 #if 1 /* 0 */
6858                 isc_log_write(dns_lctx,
6859                               DNS_LOGCATEGORY_DATABASE,
6860                               DNS_LOGMODULE_CACHE,
6861                               ISC_LOG_WARNING,
6862                               "addnode: NSEC node already exists");
6863 #endif
6864                 (*nodep)->nsec = DNS_RBT_NSEC_HAS_NSEC;
6865                 return (noderesult);
6866         }
6867
6868         nsecresult = dns_rbt_deletenode(rbtdb->tree, *nodep, ISC_FALSE);
6869         if (nsecresult != ISC_R_SUCCESS)
6870                 isc_log_write(dns_lctx,
6871                               DNS_LOGCATEGORY_DATABASE,
6872                               DNS_LOGMODULE_CACHE,
6873                               ISC_LOG_WARNING,
6874                               "loading_addrdataset: "
6875                               "dns_rbt_deletenode: %s after "
6876                               "dns_rbt_addnode(NSEC): %s",
6877                               isc_result_totext(nsecresult),
6878                               isc_result_totext(noderesult));
6879         return (noderesult);
6880 }
6881
6882 static isc_result_t
6883 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6884         rbtdb_load_t *loadctx = arg;
6885         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6886         dns_rbtnode_t *node;
6887         isc_result_t result;
6888         isc_region_t region;
6889         rdatasetheader_t *newheader;
6890
6891         /*
6892          * This routine does no node locking.  See comments in
6893          * 'load' below for more information on loading and
6894          * locking.
6895          */
6896
6897
6898         /*
6899          * SOA records are only allowed at top of zone.
6900          */
6901         if (rdataset->type == dns_rdatatype_soa &&
6902             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6903                 return (DNS_R_NOTZONETOP);
6904
6905         if (rdataset->type != dns_rdatatype_nsec3 &&
6906             rdataset->covers != dns_rdatatype_nsec3)
6907                 add_empty_wildcards(rbtdb, name);
6908
6909         if (dns_name_iswildcard(name)) {
6910                 /*
6911                  * NS record owners cannot legally be wild cards.
6912                  */
6913                 if (rdataset->type == dns_rdatatype_ns)
6914                         return (DNS_R_INVALIDNS);
6915                 /*
6916                  * NSEC3 record owners cannot legally be wild cards.
6917                  */
6918                 if (rdataset->type == dns_rdatatype_nsec3)
6919                         return (DNS_R_INVALIDNSEC3);
6920                 result = add_wildcard_magic(rbtdb, name);
6921                 if (result != ISC_R_SUCCESS)
6922                         return (result);
6923         }
6924
6925         node = NULL;
6926         if (rdataset->type == dns_rdatatype_nsec3 ||
6927             rdataset->covers == dns_rdatatype_nsec3) {
6928                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6929                 if (result == ISC_R_SUCCESS)
6930                         node->nsec = DNS_RBT_NSEC_NSEC3;
6931         } else if (rdataset->type == dns_rdatatype_nsec) {
6932                 result = loadnode(rbtdb, name, &node, ISC_TRUE);
6933         } else {
6934                 result = loadnode(rbtdb, name, &node, ISC_FALSE);
6935         }
6936         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6937                 return (result);
6938         if (result == ISC_R_SUCCESS) {
6939                 dns_name_t foundname;
6940                 dns_name_init(&foundname, NULL);
6941                 dns_rbt_namefromnode(node, &foundname);
6942 #ifdef DNS_RBT_USEHASH
6943                 node->locknum = node->hashval % rbtdb->node_lock_count;
6944 #else
6945                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6946                         rbtdb->node_lock_count;
6947 #endif
6948         }
6949
6950         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6951                                             &region,
6952                                             sizeof(rdatasetheader_t));
6953         if (result != ISC_R_SUCCESS)
6954                 return (result);
6955         newheader = (rdatasetheader_t *)region.base;
6956         init_rdataset(rbtdb, newheader);
6957         set_ttl(rbtdb, newheader,
6958                 rdataset->ttl + loadctx->now); /* XXX overflow check */
6959         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6960                                                 rdataset->covers);
6961         newheader->attributes = 0;
6962         newheader->trust = rdataset->trust;
6963         newheader->serial = 1;
6964         newheader->noqname = NULL;
6965         newheader->closest = NULL;
6966         newheader->count = init_count++;
6967         newheader->additional_auth = NULL;
6968         newheader->additional_glue = NULL;
6969         newheader->last_used = 0;
6970         newheader->node = node;
6971         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6972                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6973                 newheader->resign = rdataset->resign;
6974         } else
6975                 newheader->resign = 0;
6976
6977         result = add(rbtdb, node, rbtdb->current_version, newheader,
6978                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6979         if (result == ISC_R_SUCCESS &&
6980             delegating_type(rbtdb, node, rdataset->type))
6981                 node->find_callback = 1;
6982         else if (result == DNS_R_UNCHANGED)
6983                 result = ISC_R_SUCCESS;
6984
6985         return (result);
6986 }
6987
6988 static isc_result_t
6989 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6990         rbtdb_load_t *loadctx;
6991         dns_rbtdb_t *rbtdb;
6992
6993         rbtdb = (dns_rbtdb_t *)db;
6994
6995         REQUIRE(VALID_RBTDB(rbtdb));
6996
6997         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6998         if (loadctx == NULL)
6999                 return (ISC_R_NOMEMORY);
7000
7001         loadctx->rbtdb = rbtdb;
7002         if (IS_CACHE(rbtdb))
7003                 isc_stdtime_get(&loadctx->now);
7004         else
7005                 loadctx->now = 0;
7006
7007         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7008
7009         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
7010                 == 0);
7011         rbtdb->attributes |= RBTDB_ATTR_LOADING;
7012
7013         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7014
7015         *addp = loading_addrdataset;
7016         *dbloadp = loadctx;
7017
7018         return (ISC_R_SUCCESS);
7019 }
7020
7021 static isc_result_t
7022 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
7023         rbtdb_load_t *loadctx;
7024         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7025
7026         REQUIRE(VALID_RBTDB(rbtdb));
7027         REQUIRE(dbloadp != NULL);
7028         loadctx = *dbloadp;
7029         REQUIRE(loadctx->rbtdb == rbtdb);
7030
7031         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7032
7033         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
7034         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
7035
7036         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
7037         rbtdb->attributes |= RBTDB_ATTR_LOADED;
7038
7039         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7040
7041         /*
7042          * If there's a KEY rdataset at the zone origin containing a
7043          * zone key, we consider the zone secure.
7044          */
7045         if (! IS_CACHE(rbtdb))
7046                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
7047
7048         *dbloadp = NULL;
7049
7050         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
7051
7052         return (ISC_R_SUCCESS);
7053 }
7054
7055 static isc_result_t
7056 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
7057      dns_masterformat_t masterformat) {
7058         dns_rbtdb_t *rbtdb;
7059
7060         rbtdb = (dns_rbtdb_t *)db;
7061
7062         REQUIRE(VALID_RBTDB(rbtdb));
7063
7064 #ifdef BIND9
7065         return (dns_master_dump2(rbtdb->common.mctx, db, version,
7066                                  &dns_master_style_default,
7067                                  filename, masterformat));
7068 #else
7069         UNUSED(version);
7070         UNUSED(filename);
7071         UNUSED(masterformat);
7072
7073         return (ISC_R_NOTIMPLEMENTED);
7074 #endif /* BIND9 */
7075 }
7076
7077 static void
7078 delete_callback(void *data, void *arg) {
7079         dns_rbtdb_t *rbtdb = arg;
7080         rdatasetheader_t *current, *next;
7081         unsigned int locknum;
7082
7083         current = data;
7084         locknum = current->node->locknum;
7085         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
7086         while (current != NULL) {
7087                 next = current->next;
7088                 free_rdataset(rbtdb, rbtdb->common.mctx, current);
7089                 current = next;
7090         }
7091         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
7092 }
7093
7094 static isc_boolean_t
7095 issecure(dns_db_t *db) {
7096         dns_rbtdb_t *rbtdb;
7097         isc_boolean_t secure;
7098
7099         rbtdb = (dns_rbtdb_t *)db;
7100
7101         REQUIRE(VALID_RBTDB(rbtdb));
7102
7103         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7104         secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
7105         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7106
7107         return (secure);
7108 }
7109
7110 static isc_boolean_t
7111 isdnssec(dns_db_t *db) {
7112         dns_rbtdb_t *rbtdb;
7113         isc_boolean_t dnssec;
7114
7115         rbtdb = (dns_rbtdb_t *)db;
7116
7117         REQUIRE(VALID_RBTDB(rbtdb));
7118
7119         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7120         dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
7121         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7122
7123         return (dnssec);
7124 }
7125
7126 static unsigned int
7127 nodecount(dns_db_t *db) {
7128         dns_rbtdb_t *rbtdb;
7129         unsigned int count;
7130
7131         rbtdb = (dns_rbtdb_t *)db;
7132
7133         REQUIRE(VALID_RBTDB(rbtdb));
7134
7135         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7136         count = dns_rbt_nodecount(rbtdb->tree);
7137         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7138
7139         return (count);
7140 }
7141
7142 static void
7143 settask(dns_db_t *db, isc_task_t *task) {
7144         dns_rbtdb_t *rbtdb;
7145
7146         rbtdb = (dns_rbtdb_t *)db;
7147
7148         REQUIRE(VALID_RBTDB(rbtdb));
7149
7150         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7151         if (rbtdb->task != NULL)
7152                 isc_task_detach(&rbtdb->task);
7153         if (task != NULL)
7154                 isc_task_attach(task, &rbtdb->task);
7155         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7156 }
7157
7158 static isc_boolean_t
7159 ispersistent(dns_db_t *db) {
7160         UNUSED(db);
7161         return (ISC_FALSE);
7162 }
7163
7164 static isc_result_t
7165 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
7166         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7167         dns_rbtnode_t *onode;
7168         isc_result_t result = ISC_R_SUCCESS;
7169
7170         REQUIRE(VALID_RBTDB(rbtdb));
7171         REQUIRE(nodep != NULL && *nodep == NULL);
7172
7173         /* Note that the access to origin_node doesn't require a DB lock */
7174         onode = (dns_rbtnode_t *)rbtdb->origin_node;
7175         if (onode != NULL) {
7176                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
7177                 new_reference(rbtdb, onode);
7178                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
7179
7180                 *nodep = rbtdb->origin_node;
7181         } else {
7182                 INSIST(IS_CACHE(rbtdb));
7183                 result = ISC_R_NOTFOUND;
7184         }
7185
7186         return (result);
7187 }
7188
7189 static isc_result_t
7190 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
7191                    isc_uint8_t *flags, isc_uint16_t *iterations,
7192                    unsigned char *salt, size_t *salt_length)
7193 {
7194         dns_rbtdb_t *rbtdb;
7195         isc_result_t result = ISC_R_NOTFOUND;
7196         rbtdb_version_t *rbtversion = version;
7197
7198         rbtdb = (dns_rbtdb_t *)db;
7199
7200         REQUIRE(VALID_RBTDB(rbtdb));
7201
7202         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7203
7204         if (rbtversion == NULL)
7205                 rbtversion = rbtdb->current_version;
7206
7207         if (rbtversion->havensec3) {
7208                 if (hash != NULL)
7209                         *hash = rbtversion->hash;
7210                 if (salt != NULL && salt_length != NULL) {
7211                         REQUIRE(*salt_length >= rbtversion->salt_length);
7212                         memcpy(salt, rbtversion->salt, rbtversion->salt_length);
7213                 }
7214                 if (salt_length != NULL)
7215                         *salt_length = rbtversion->salt_length;
7216                 if (iterations != NULL)
7217                         *iterations = rbtversion->iterations;
7218                 if (flags != NULL)
7219                         *flags = rbtversion->flags;
7220                 result = ISC_R_SUCCESS;
7221         }
7222         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7223
7224         return (result);
7225 }
7226
7227 static isc_result_t
7228 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
7229         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7230         isc_stdtime_t oldresign;
7231         isc_result_t result = ISC_R_SUCCESS;
7232         rdatasetheader_t *header;
7233
7234         REQUIRE(VALID_RBTDB(rbtdb));
7235         REQUIRE(!IS_CACHE(rbtdb));
7236         REQUIRE(rdataset != NULL);
7237
7238         header = rdataset->private3;
7239         header--;
7240
7241         NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
7242                   isc_rwlocktype_write);
7243
7244         oldresign = header->resign;
7245         header->resign = resign;
7246         if (header->heap_index != 0) {
7247                 INSIST(RESIGN(header));
7248                 if (resign == 0) {
7249                         isc_heap_delete(rbtdb->heaps[header->node->locknum],
7250                                         header->heap_index);
7251                         header->heap_index = 0;
7252                 } else if (resign < oldresign)
7253                         isc_heap_increased(rbtdb->heaps[header->node->locknum],
7254                                            header->heap_index);
7255                 else if (resign > oldresign)
7256                         isc_heap_decreased(rbtdb->heaps[header->node->locknum],
7257                                            header->heap_index);
7258         } else if (resign && header->heap_index == 0) {
7259                 header->attributes |= RDATASET_ATTR_RESIGN;
7260                 result = resign_insert(rbtdb, header->node->locknum, header);
7261         }
7262         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7263                     isc_rwlocktype_write);
7264         return (result);
7265 }
7266
7267 static isc_result_t
7268 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
7269                dns_name_t *foundname)
7270 {
7271         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7272         rdatasetheader_t *header = NULL, *this;
7273         unsigned int i;
7274         isc_result_t result = ISC_R_NOTFOUND;
7275         unsigned int locknum;
7276
7277         REQUIRE(VALID_RBTDB(rbtdb));
7278
7279         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7280
7281         for (i = 0; i < rbtdb->node_lock_count; i++) {
7282                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
7283                 this = isc_heap_element(rbtdb->heaps[i], 1);
7284                 if (this == NULL) {
7285                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7286                                     isc_rwlocktype_read);
7287                         continue;
7288                 }
7289                 if (header == NULL)
7290                         header = this;
7291                 else if (isc_serial_lt(this->resign, header->resign)) {
7292                         locknum = header->node->locknum;
7293                         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
7294                                     isc_rwlocktype_read);
7295                         header = this;
7296                 } else
7297                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7298                                     isc_rwlocktype_read);
7299         }
7300
7301         if (header == NULL)
7302                 goto unlock;
7303
7304         bind_rdataset(rbtdb, header->node, header, 0, rdataset);
7305
7306         if (foundname != NULL)
7307                 dns_rbt_fullnamefromnode(header->node, foundname);
7308
7309         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7310                     isc_rwlocktype_read);
7311
7312         result = ISC_R_SUCCESS;
7313
7314  unlock:
7315         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7316
7317         return (result);
7318 }
7319
7320 static void
7321 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
7322 {
7323         rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
7324         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7325         dns_rbtnode_t *node;
7326         rdatasetheader_t *header;
7327
7328         REQUIRE(VALID_RBTDB(rbtdb));
7329         REQUIRE(rdataset != NULL);
7330         REQUIRE(rbtdb->future_version == rbtversion);
7331         REQUIRE(rbtversion->writer);
7332
7333         node = rdataset->private2;
7334         header = rdataset->private3;
7335         header--;
7336
7337         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7338         NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
7339                   isc_rwlocktype_write);
7340         /*
7341          * Delete from heap and save to re-signed list so that it can
7342          * be restored if we backout of this change.
7343          */
7344         new_reference(rbtdb, node);
7345         isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
7346         header->heap_index = 0;
7347         ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
7348
7349         NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
7350                     isc_rwlocktype_write);
7351         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7352 }
7353
7354 static dns_stats_t *
7355 getrrsetstats(dns_db_t *db) {
7356         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7357
7358         REQUIRE(VALID_RBTDB(rbtdb));
7359         REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
7360
7361         return (rbtdb->rrsetstats);
7362 }
7363
7364 static dns_dbmethods_t zone_methods = {
7365         attach,
7366         detach,
7367         beginload,
7368         endload,
7369         dump,
7370         currentversion,
7371         newversion,
7372         attachversion,
7373         closeversion,
7374         findnode,
7375         zone_find,
7376         zone_findzonecut,
7377         attachnode,
7378         detachnode,
7379         expirenode,
7380         printnode,
7381         createiterator,
7382         zone_findrdataset,
7383         allrdatasets,
7384         addrdataset,
7385         subtractrdataset,
7386         deleterdataset,
7387         issecure,
7388         nodecount,
7389         ispersistent,
7390         overmem,
7391         settask,
7392         getoriginnode,
7393         NULL,
7394         getnsec3parameters,
7395         findnsec3node,
7396         setsigningtime,
7397         getsigningtime,
7398         resigned,
7399         isdnssec,
7400         NULL,
7401 #ifdef BIND9
7402         get_rpz_enabled,
7403         rpz_findips
7404 #else
7405         NULL,
7406         NULL
7407 #endif
7408 };
7409
7410 static dns_dbmethods_t cache_methods = {
7411         attach,
7412         detach,
7413         beginload,
7414         endload,
7415         dump,
7416         currentversion,
7417         newversion,
7418         attachversion,
7419         closeversion,
7420         findnode,
7421         cache_find,
7422         cache_findzonecut,
7423         attachnode,
7424         detachnode,
7425         expirenode,
7426         printnode,
7427         createiterator,
7428         cache_findrdataset,
7429         allrdatasets,
7430         addrdataset,
7431         subtractrdataset,
7432         deleterdataset,
7433         issecure,
7434         nodecount,
7435         ispersistent,
7436         overmem,
7437         settask,
7438         getoriginnode,
7439         NULL,
7440         NULL,
7441         NULL,
7442         NULL,
7443         NULL,
7444         NULL,
7445         isdnssec,
7446         getrrsetstats,
7447         NULL,
7448         NULL
7449 };
7450
7451 isc_result_t
7452 #ifdef DNS_RBTDB_VERSION64
7453 dns_rbtdb64_create
7454 #else
7455 dns_rbtdb_create
7456 #endif
7457                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
7458                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
7459                  void *driverarg, dns_db_t **dbp)
7460 {
7461         dns_rbtdb_t *rbtdb;
7462         isc_result_t result;
7463         int i;
7464         dns_name_t name;
7465         isc_boolean_t (*sooner)(void *, void *);
7466
7467         /* Keep the compiler happy. */
7468         UNUSED(argc);
7469         UNUSED(argv);
7470         UNUSED(driverarg);
7471
7472         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
7473         if (rbtdb == NULL)
7474                 return (ISC_R_NOMEMORY);
7475
7476         memset(rbtdb, '\0', sizeof(*rbtdb));
7477         dns_name_init(&rbtdb->common.origin, NULL);
7478         rbtdb->common.attributes = 0;
7479         if (type == dns_dbtype_cache) {
7480                 rbtdb->common.methods = &cache_methods;
7481                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
7482         } else if (type == dns_dbtype_stub) {
7483                 rbtdb->common.methods = &zone_methods;
7484                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
7485         } else
7486                 rbtdb->common.methods = &zone_methods;
7487         rbtdb->common.rdclass = rdclass;
7488         rbtdb->common.mctx = NULL;
7489
7490         result = RBTDB_INITLOCK(&rbtdb->lock);
7491         if (result != ISC_R_SUCCESS)
7492                 goto cleanup_rbtdb;
7493
7494         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
7495         if (result != ISC_R_SUCCESS)
7496                 goto cleanup_lock;
7497
7498         /*
7499          * Initialize node_lock_count in a generic way to support future
7500          * extension which allows the user to specify this value on creation.
7501          * Note that when specified for a cache DB it must be larger than 1
7502          * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
7503          */
7504         if (rbtdb->node_lock_count == 0) {
7505                 if (IS_CACHE(rbtdb))
7506                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
7507                 else
7508                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
7509         } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
7510                 result = ISC_R_RANGE;
7511                 goto cleanup_tree_lock;
7512         }
7513         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
7514         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
7515                                         sizeof(rbtdb_nodelock_t));
7516         if (rbtdb->node_locks == NULL) {
7517                 result = ISC_R_NOMEMORY;
7518                 goto cleanup_tree_lock;
7519         }
7520
7521         rbtdb->rrsetstats = NULL;
7522         if (IS_CACHE(rbtdb)) {
7523                 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
7524                 if (result != ISC_R_SUCCESS)
7525                         goto cleanup_node_locks;
7526                 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
7527                                                sizeof(rdatasetheaderlist_t));
7528                 if (rbtdb->rdatasets == NULL) {
7529                         result = ISC_R_NOMEMORY;
7530                         goto cleanup_rrsetstats;
7531                 }
7532                 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7533                         ISC_LIST_INIT(rbtdb->rdatasets[i]);
7534         } else
7535                 rbtdb->rdatasets = NULL;
7536
7537         /*
7538          * Create the heaps.
7539          */
7540         rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
7541                                    sizeof(isc_heap_t *));
7542         if (rbtdb->heaps == NULL) {
7543                 result = ISC_R_NOMEMORY;
7544                 goto cleanup_rdatasets;
7545         }
7546         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7547                 rbtdb->heaps[i] = NULL;
7548         sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
7549         for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
7550                 result = isc_heap_create(mctx, sooner, set_index, 0,
7551                                          &rbtdb->heaps[i]);
7552                 if (result != ISC_R_SUCCESS)
7553                         goto cleanup_heaps;
7554         }
7555
7556         /*
7557          * Create deadnode lists.
7558          */
7559         rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
7560                                        sizeof(rbtnodelist_t));
7561         if (rbtdb->deadnodes == NULL) {
7562                 result = ISC_R_NOMEMORY;
7563                 goto cleanup_heaps;
7564         }
7565         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7566                 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7567
7568         rbtdb->active = rbtdb->node_lock_count;
7569
7570         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7571                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7572                 if (result == ISC_R_SUCCESS) {
7573                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7574                         if (result != ISC_R_SUCCESS)
7575                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7576                 }
7577                 if (result != ISC_R_SUCCESS) {
7578                         while (i-- > 0) {
7579                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7580                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7581                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7582                         }
7583                         goto cleanup_deadnodes;
7584                 }
7585                 rbtdb->node_locks[i].exiting = ISC_FALSE;
7586         }
7587
7588         /*
7589          * Attach to the mctx.  The database will persist so long as there
7590          * are references to it, and attaching to the mctx ensures that our
7591          * mctx won't disappear out from under us.
7592          */
7593         isc_mem_attach(mctx, &rbtdb->common.mctx);
7594
7595         /*
7596          * Must be initialized before free_rbtdb() is called.
7597          */
7598         isc_ondestroy_init(&rbtdb->common.ondest);
7599
7600         /*
7601          * Make a copy of the origin name.
7602          */
7603         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7604         if (result != ISC_R_SUCCESS) {
7605                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7606                 return (result);
7607         }
7608
7609         /*
7610          * Make the Red-Black Trees.
7611          */
7612         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7613         if (result != ISC_R_SUCCESS) {
7614                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7615                 return (result);
7616         }
7617
7618         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec);
7619         if (result != ISC_R_SUCCESS) {
7620                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7621                 return (result);
7622         }
7623
7624         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7625         if (result != ISC_R_SUCCESS) {
7626                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7627                 return (result);
7628         }
7629
7630 #ifdef BIND9
7631         /*
7632          * Get ready for response policy IP address searching if at least one
7633          * zone has been configured as a response policy zone and this
7634          * is not a cache zone.
7635          * It would be better to know that this database is for a policy
7636          * zone named for a view, but that would require knowledge from
7637          * above such as an argv[] set from data in the zone.
7638          */
7639         if (type == dns_dbtype_zone && !dns_name_equal(origin, dns_rootname)) {
7640                 result = dns_rpz_new_cidr(mctx, origin, &rbtdb->rpz_cidr);
7641                 if (result != ISC_R_SUCCESS) {
7642                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7643                         return (result);
7644                 }
7645         }
7646 #endif
7647
7648         /*
7649          * In order to set the node callback bit correctly in zone databases,
7650          * we need to know if the node has the origin name of the zone.
7651          * In loading_addrdataset() we could simply compare the new name
7652          * to the origin name, but this is expensive.  Also, we don't know the
7653          * node name in addrdataset(), so we need another way of knowing the
7654          * zone's top.
7655          *
7656          * We now explicitly create a node for the zone's origin, and then
7657          * we simply remember the node's address.  This is safe, because
7658          * the top-of-zone node can never be deleted, nor can its address
7659          * change.
7660          */
7661         if (!IS_CACHE(rbtdb)) {
7662                 dns_rbtnode_t *nsec3node;
7663
7664                 rbtdb->origin_node = NULL;
7665                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7666                                          &rbtdb->origin_node);
7667                 if (result != ISC_R_SUCCESS) {
7668                         INSIST(result != ISC_R_EXISTS);
7669                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7670                         return (result);
7671                 }
7672                 rbtdb->origin_node->nsec = DNS_RBT_NSEC_NORMAL;
7673                 /*
7674                  * We need to give the origin node the right locknum.
7675                  */
7676                 dns_name_init(&name, NULL);
7677                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7678 #ifdef DNS_RBT_USEHASH
7679                 rbtdb->origin_node->locknum =
7680                         rbtdb->origin_node->hashval %
7681                         rbtdb->node_lock_count;
7682 #else
7683                 rbtdb->origin_node->locknum =
7684                         dns_name_hash(&name, ISC_TRUE) %
7685                         rbtdb->node_lock_count;
7686 #endif
7687                 /*
7688                  * Add an apex node to the NSEC3 tree so that NSEC3 searches
7689                  * return partial matches when there is only a single NSEC3
7690                  * record in the tree.
7691                  */
7692                 nsec3node = NULL;
7693                 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
7694                                          &nsec3node);
7695                 if (result != ISC_R_SUCCESS) {
7696                         INSIST(result != ISC_R_EXISTS);
7697                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7698                         return (result);
7699                 }
7700                 nsec3node->nsec = DNS_RBT_NSEC_NSEC3;
7701                 /*
7702                  * We need to give the nsec3 origin node the right locknum.
7703                  */
7704                 dns_name_init(&name, NULL);
7705                 dns_rbt_namefromnode(nsec3node, &name);
7706 #ifdef DNS_RBT_USEHASH
7707                 nsec3node->locknum = nsec3node->hashval %
7708                         rbtdb->node_lock_count;
7709 #else
7710                 nsec3node->locknum = dns_name_hash(&name, ISC_TRUE) %
7711                         rbtdb->node_lock_count;
7712 #endif
7713         }
7714
7715         /*
7716          * Misc. Initialization.
7717          */
7718         result = isc_refcount_init(&rbtdb->references, 1);
7719         if (result != ISC_R_SUCCESS) {
7720                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7721                 return (result);
7722         }
7723         rbtdb->attributes = 0;
7724         rbtdb->task = NULL;
7725
7726         /*
7727          * Version Initialization.
7728          */
7729         rbtdb->current_serial = 1;
7730         rbtdb->least_serial = 1;
7731         rbtdb->next_serial = 2;
7732         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7733         if (rbtdb->current_version == NULL) {
7734                 isc_refcount_decrement(&rbtdb->references, NULL);
7735                 isc_refcount_destroy(&rbtdb->references);
7736                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7737                 return (ISC_R_NOMEMORY);
7738         }
7739         rbtdb->current_version->secure = dns_db_insecure;
7740         rbtdb->current_version->havensec3 = ISC_FALSE;
7741         rbtdb->current_version->flags = 0;
7742         rbtdb->current_version->iterations = 0;
7743         rbtdb->current_version->hash = 0;
7744         rbtdb->current_version->salt_length = 0;
7745         memset(rbtdb->current_version->salt, 0,
7746                sizeof(rbtdb->current_version->salt));
7747         rbtdb->future_version = NULL;
7748         ISC_LIST_INIT(rbtdb->open_versions);
7749         /*
7750          * Keep the current version in the open list so that list operation
7751          * won't happen in normal lookup operations.
7752          */
7753         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7754
7755         rbtdb->common.magic = DNS_DB_MAGIC;
7756         rbtdb->common.impmagic = RBTDB_MAGIC;
7757
7758         *dbp = (dns_db_t *)rbtdb;
7759
7760         return (ISC_R_SUCCESS);
7761
7762  cleanup_deadnodes:
7763         isc_mem_put(mctx, rbtdb->deadnodes,
7764                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7765
7766  cleanup_heaps:
7767         if (rbtdb->heaps != NULL) {
7768                 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7769                         if (rbtdb->heaps[i] != NULL)
7770                                 isc_heap_destroy(&rbtdb->heaps[i]);
7771                 isc_mem_put(mctx, rbtdb->heaps,
7772                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
7773         }
7774
7775  cleanup_rdatasets:
7776         if (rbtdb->rdatasets != NULL)
7777                 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7778                             sizeof(rdatasetheaderlist_t));
7779  cleanup_rrsetstats:
7780         if (rbtdb->rrsetstats != NULL)
7781                 dns_stats_detach(&rbtdb->rrsetstats);
7782
7783  cleanup_node_locks:
7784         isc_mem_put(mctx, rbtdb->node_locks,
7785                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7786
7787  cleanup_tree_lock:
7788         isc_rwlock_destroy(&rbtdb->tree_lock);
7789
7790  cleanup_lock:
7791         RBTDB_DESTROYLOCK(&rbtdb->lock);
7792
7793  cleanup_rbtdb:
7794         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
7795         return (result);
7796 }
7797
7798
7799 /*
7800  * Slabbed Rdataset Methods
7801  */
7802
7803 static void
7804 rdataset_disassociate(dns_rdataset_t *rdataset) {
7805         dns_db_t *db = rdataset->private1;
7806         dns_dbnode_t *node = rdataset->private2;
7807
7808         detachnode(db, &node);
7809 }
7810
7811 static isc_result_t
7812 rdataset_first(dns_rdataset_t *rdataset) {
7813         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7814         unsigned int count;
7815
7816         count = raw[0] * 256 + raw[1];
7817         if (count == 0) {
7818                 rdataset->private5 = NULL;
7819                 return (ISC_R_NOMORE);
7820         }
7821
7822 #if DNS_RDATASET_FIXED
7823         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7824                 raw += 2 + (4 * count);
7825         else
7826 #endif
7827                 raw += 2;
7828
7829         /*
7830          * The privateuint4 field is the number of rdata beyond the
7831          * cursor position, so we decrement the total count by one
7832          * before storing it.
7833          *
7834          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7835          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7836          * to the first entry in the offset table.
7837          */
7838         count--;
7839         rdataset->privateuint4 = count;
7840         rdataset->private5 = raw;
7841
7842         return (ISC_R_SUCCESS);
7843 }
7844
7845 static isc_result_t
7846 rdataset_next(dns_rdataset_t *rdataset) {
7847         unsigned int count;
7848         unsigned int length;
7849         unsigned char *raw;     /* RDATASLAB */
7850
7851         count = rdataset->privateuint4;
7852         if (count == 0)
7853                 return (ISC_R_NOMORE);
7854         count--;
7855         rdataset->privateuint4 = count;
7856
7857         /*
7858          * Skip forward one record (length + 4) or one offset (4).
7859          */
7860         raw = rdataset->private5;
7861 #if DNS_RDATASET_FIXED
7862         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7863 #endif
7864                 length = raw[0] * 256 + raw[1];
7865                 raw += length;
7866 #if DNS_RDATASET_FIXED
7867         }
7868         rdataset->private5 = raw + 4;           /* length(2) + order(2) */
7869 #else
7870         rdataset->private5 = raw + 2;           /* length(2) */
7871 #endif
7872
7873         return (ISC_R_SUCCESS);
7874 }
7875
7876 static void
7877 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7878         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
7879 #if DNS_RDATASET_FIXED
7880         unsigned int offset;
7881 #endif
7882         unsigned int length;
7883         isc_region_t r;
7884         unsigned int flags = 0;
7885
7886         REQUIRE(raw != NULL);
7887
7888         /*
7889          * Find the start of the record if not already in private5
7890          * then skip the length and order fields.
7891          */
7892 #if DNS_RDATASET_FIXED
7893         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7894                 offset = (raw[0] << 24) + (raw[1] << 16) +
7895                          (raw[2] << 8) + raw[3];
7896                 raw = rdataset->private3;
7897                 raw += offset;
7898         }
7899 #endif
7900         length = raw[0] * 256 + raw[1];
7901 #if DNS_RDATASET_FIXED
7902         raw += 4;
7903 #else
7904         raw += 2;
7905 #endif
7906         if (rdataset->type == dns_rdatatype_rrsig) {
7907                 if (*raw & DNS_RDATASLAB_OFFLINE)
7908                         flags |= DNS_RDATA_OFFLINE;
7909                 length--;
7910                 raw++;
7911         }
7912         r.length = length;
7913         r.base = raw;
7914         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7915         rdata->flags |= flags;
7916 }
7917
7918 static void
7919 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7920         dns_db_t *db = source->private1;
7921         dns_dbnode_t *node = source->private2;
7922         dns_dbnode_t *cloned_node = NULL;
7923
7924         attachnode(db, node, &cloned_node);
7925         *target = *source;
7926
7927         /*
7928          * Reset iterator state.
7929          */
7930         target->privateuint4 = 0;
7931         target->private5 = NULL;
7932 }
7933
7934 static unsigned int
7935 rdataset_count(dns_rdataset_t *rdataset) {
7936         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7937         unsigned int count;
7938
7939         count = raw[0] * 256 + raw[1];
7940
7941         return (count);
7942 }
7943
7944 static isc_result_t
7945 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7946                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7947 {
7948         dns_db_t *db = rdataset->private1;
7949         dns_dbnode_t *node = rdataset->private2;
7950         dns_dbnode_t *cloned_node;
7951         struct noqname *noqname = rdataset->private6;
7952
7953         cloned_node = NULL;
7954         attachnode(db, node, &cloned_node);
7955         nsec->methods = &rdataset_methods;
7956         nsec->rdclass = db->rdclass;
7957         nsec->type = noqname->type;
7958         nsec->covers = 0;
7959         nsec->ttl = rdataset->ttl;
7960         nsec->trust = rdataset->trust;
7961         nsec->private1 = rdataset->private1;
7962         nsec->private2 = rdataset->private2;
7963         nsec->private3 = noqname->neg;
7964         nsec->privateuint4 = 0;
7965         nsec->private5 = NULL;
7966         nsec->private6 = NULL;
7967         nsec->private7 = NULL;
7968
7969         cloned_node = NULL;
7970         attachnode(db, node, &cloned_node);
7971         nsecsig->methods = &rdataset_methods;
7972         nsecsig->rdclass = db->rdclass;
7973         nsecsig->type = dns_rdatatype_rrsig;
7974         nsecsig->covers = noqname->type;
7975         nsecsig->ttl = rdataset->ttl;
7976         nsecsig->trust = rdataset->trust;
7977         nsecsig->private1 = rdataset->private1;
7978         nsecsig->private2 = rdataset->private2;
7979         nsecsig->private3 = noqname->negsig;
7980         nsecsig->privateuint4 = 0;
7981         nsecsig->private5 = NULL;
7982         nsec->private6 = NULL;
7983         nsec->private7 = NULL;
7984
7985         dns_name_clone(&noqname->name, name);
7986
7987         return (ISC_R_SUCCESS);
7988 }
7989
7990 static isc_result_t
7991 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7992                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7993 {
7994         dns_db_t *db = rdataset->private1;
7995         dns_dbnode_t *node = rdataset->private2;
7996         dns_dbnode_t *cloned_node;
7997         struct noqname *closest = rdataset->private7;
7998
7999         cloned_node = NULL;
8000         attachnode(db, node, &cloned_node);
8001         nsec->methods = &rdataset_methods;
8002         nsec->rdclass = db->rdclass;
8003         nsec->type = closest->type;
8004         nsec->covers = 0;
8005         nsec->ttl = rdataset->ttl;
8006         nsec->trust = rdataset->trust;
8007         nsec->private1 = rdataset->private1;
8008         nsec->private2 = rdataset->private2;
8009         nsec->private3 = closest->neg;
8010         nsec->privateuint4 = 0;
8011         nsec->private5 = NULL;
8012         nsec->private6 = NULL;
8013         nsec->private7 = NULL;
8014
8015         cloned_node = NULL;
8016         attachnode(db, node, &cloned_node);
8017         nsecsig->methods = &rdataset_methods;
8018         nsecsig->rdclass = db->rdclass;
8019         nsecsig->type = dns_rdatatype_rrsig;
8020         nsecsig->covers = closest->type;
8021         nsecsig->ttl = rdataset->ttl;
8022         nsecsig->trust = rdataset->trust;
8023         nsecsig->private1 = rdataset->private1;
8024         nsecsig->private2 = rdataset->private2;
8025         nsecsig->private3 = closest->negsig;
8026         nsecsig->privateuint4 = 0;
8027         nsecsig->private5 = NULL;
8028         nsec->private6 = NULL;
8029         nsec->private7 = NULL;
8030
8031         dns_name_clone(&closest->name, name);
8032
8033         return (ISC_R_SUCCESS);
8034 }
8035
8036 static void
8037 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
8038         dns_rbtdb_t *rbtdb = rdataset->private1;
8039         dns_rbtnode_t *rbtnode = rdataset->private2;
8040         rdatasetheader_t *header = rdataset->private3;
8041
8042         header--;
8043         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8044                   isc_rwlocktype_write);
8045         header->trust = rdataset->trust = trust;
8046         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8047                   isc_rwlocktype_write);
8048 }
8049
8050 static void
8051 rdataset_expire(dns_rdataset_t *rdataset) {
8052         dns_rbtdb_t *rbtdb = rdataset->private1;
8053         dns_rbtnode_t *rbtnode = rdataset->private2;
8054         rdatasetheader_t *header = rdataset->private3;
8055
8056         header--;
8057         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8058                   isc_rwlocktype_write);
8059         expire_header(rbtdb, header, ISC_FALSE);
8060         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8061                   isc_rwlocktype_write);
8062 }
8063
8064 /*
8065  * Rdataset Iterator Methods
8066  */
8067
8068 static void
8069 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
8070         rbtdb_rdatasetiter_t *rbtiterator;
8071
8072         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
8073
8074         if (rbtiterator->common.version != NULL)
8075                 closeversion(rbtiterator->common.db,
8076                              &rbtiterator->common.version, ISC_FALSE);
8077         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
8078         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
8079                     sizeof(*rbtiterator));
8080
8081         *iteratorp = NULL;
8082 }
8083
8084 static isc_result_t
8085 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
8086         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8087         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8088         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8089         rbtdb_version_t *rbtversion = rbtiterator->common.version;
8090         rdatasetheader_t *header, *top_next;
8091         rbtdb_serial_t serial;
8092         isc_stdtime_t now;
8093
8094         if (IS_CACHE(rbtdb)) {
8095                 serial = 1;
8096                 now = rbtiterator->common.now;
8097         } else {
8098                 serial = rbtversion->serial;
8099                 now = 0;
8100         }
8101
8102         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8103                   isc_rwlocktype_read);
8104
8105         for (header = rbtnode->data; header != NULL; header = top_next) {
8106                 top_next = header->next;
8107                 do {
8108                         if (header->serial <= serial && !IGNORE(header)) {
8109                                 /*
8110                                  * Is this a "this rdataset doesn't exist"
8111                                  * record?  Or is it too old in the cache?
8112                                  *
8113                                  * Note: unlike everywhere else, we
8114                                  * check for now > header->rdh_ttl instead
8115                                  * of now >= header->rdh_ttl.  This allows
8116                                  * ANY and RRSIG queries for 0 TTL
8117                                  * rdatasets to work.
8118                                  */
8119                                 if (NONEXISTENT(header) ||
8120                                     (now != 0 && now > header->rdh_ttl))
8121                                         header = NULL;
8122                                 break;
8123                         } else
8124                                 header = header->down;
8125                 } while (header != NULL);
8126                 if (header != NULL)
8127                         break;
8128         }
8129
8130         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8131                     isc_rwlocktype_read);
8132
8133         rbtiterator->current = header;
8134
8135         if (header == NULL)
8136                 return (ISC_R_NOMORE);
8137
8138         return (ISC_R_SUCCESS);
8139 }
8140
8141 static isc_result_t
8142 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
8143         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8144         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8145         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8146         rbtdb_version_t *rbtversion = rbtiterator->common.version;
8147         rdatasetheader_t *header, *top_next;
8148         rbtdb_serial_t serial;
8149         isc_stdtime_t now;
8150         rbtdb_rdatatype_t type, negtype;
8151         dns_rdatatype_t rdtype, covers;
8152
8153         header = rbtiterator->current;
8154         if (header == NULL)
8155                 return (ISC_R_NOMORE);
8156
8157         if (IS_CACHE(rbtdb)) {
8158                 serial = 1;
8159                 now = rbtiterator->common.now;
8160         } else {
8161                 serial = rbtversion->serial;
8162                 now = 0;
8163         }
8164
8165         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8166                   isc_rwlocktype_read);
8167
8168         type = header->type;
8169         rdtype = RBTDB_RDATATYPE_BASE(header->type);
8170         if (NEGATIVE(header)) {
8171                 covers = RBTDB_RDATATYPE_EXT(header->type);
8172                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
8173         } else
8174                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
8175         for (header = header->next; header != NULL; header = top_next) {
8176                 top_next = header->next;
8177                 /*
8178                  * If not walking back up the down list.
8179                  */
8180                 if (header->type != type && header->type != negtype) {
8181                         do {
8182                                 if (header->serial <= serial &&
8183                                     !IGNORE(header)) {
8184                                         /*
8185                                          * Is this a "this rdataset doesn't
8186                                          * exist" record?
8187                                          *
8188                                          * Note: unlike everywhere else, we
8189                                          * check for now > header->ttl instead
8190                                          * of now >= header->ttl.  This allows
8191                                          * ANY and RRSIG queries for 0 TTL
8192                                          * rdatasets to work.
8193                                          */
8194                                         if ((header->attributes &
8195                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
8196                                             (now != 0 && now > header->rdh_ttl))
8197                                                 header = NULL;
8198                                         break;
8199                                 } else
8200                                         header = header->down;
8201                         } while (header != NULL);
8202                         if (header != NULL)
8203                                 break;
8204                 }
8205         }
8206
8207         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8208                     isc_rwlocktype_read);
8209
8210         rbtiterator->current = header;
8211
8212         if (header == NULL)
8213                 return (ISC_R_NOMORE);
8214
8215         return (ISC_R_SUCCESS);
8216 }
8217
8218 static void
8219 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
8220         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8221         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8222         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8223         rdatasetheader_t *header;
8224
8225         header = rbtiterator->current;
8226         REQUIRE(header != NULL);
8227
8228         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8229                   isc_rwlocktype_read);
8230
8231         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
8232                       rdataset);
8233
8234         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8235                     isc_rwlocktype_read);
8236 }
8237
8238
8239 /*
8240  * Database Iterator Methods
8241  */
8242
8243 static inline void
8244 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
8245         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8246         dns_rbtnode_t *node = rbtdbiter->node;
8247
8248         if (node == NULL)
8249                 return;
8250
8251         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
8252         reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
8253 }
8254
8255 static inline void
8256 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
8257         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8258         dns_rbtnode_t *node = rbtdbiter->node;
8259         nodelock_t *lock;
8260
8261         if (node == NULL)
8262                 return;
8263
8264         lock = &rbtdb->node_locks[node->locknum].lock;
8265         NODE_LOCK(lock, isc_rwlocktype_read);
8266         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
8267                             rbtdbiter->tree_locked, ISC_FALSE);
8268         NODE_UNLOCK(lock, isc_rwlocktype_read);
8269
8270         rbtdbiter->node = NULL;
8271 }
8272
8273 static void
8274 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
8275         dns_rbtnode_t *node;
8276         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8277         isc_boolean_t was_read_locked = ISC_FALSE;
8278         nodelock_t *lock;
8279         int i;
8280
8281         if (rbtdbiter->delete != 0) {
8282                 /*
8283                  * Note that "%d node of %d in tree" can report things like
8284                  * "flush_deletions: 59 nodes of 41 in tree".  This means
8285                  * That some nodes appear on the deletions list more than
8286                  * once.  Only the last occurence will actually be deleted.
8287                  */
8288                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
8289                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
8290                               "flush_deletions: %d nodes of %d in tree",
8291                               rbtdbiter->delete,
8292                               dns_rbt_nodecount(rbtdb->tree));
8293
8294                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
8295                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8296                         was_read_locked = ISC_TRUE;
8297                 }
8298                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8299                 rbtdbiter->tree_locked = isc_rwlocktype_write;
8300
8301                 for (i = 0; i < rbtdbiter->delete; i++) {
8302                         node = rbtdbiter->deletions[i];
8303                         lock = &rbtdb->node_locks[node->locknum].lock;
8304
8305                         NODE_LOCK(lock, isc_rwlocktype_read);
8306                         decrement_reference(rbtdb, node, 0,
8307                                             isc_rwlocktype_read,
8308                                             rbtdbiter->tree_locked, ISC_FALSE);
8309                         NODE_UNLOCK(lock, isc_rwlocktype_read);
8310                 }
8311
8312                 rbtdbiter->delete = 0;
8313
8314                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8315                 if (was_read_locked) {
8316                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8317                         rbtdbiter->tree_locked = isc_rwlocktype_read;
8318
8319                 } else {
8320                         rbtdbiter->tree_locked = isc_rwlocktype_none;
8321                 }
8322         }
8323 }
8324
8325 static inline void
8326 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
8327         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8328
8329         REQUIRE(rbtdbiter->paused);
8330         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
8331
8332         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8333         rbtdbiter->tree_locked = isc_rwlocktype_read;
8334
8335         rbtdbiter->paused = ISC_FALSE;
8336 }
8337
8338 static void
8339 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
8340         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
8341         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8342         dns_db_t *db = NULL;
8343
8344         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
8345                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8346                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8347         } else
8348                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
8349
8350         dereference_iter_node(rbtdbiter);
8351
8352         flush_deletions(rbtdbiter);
8353
8354         dns_db_attach(rbtdbiter->common.db, &db);
8355         dns_db_detach(&rbtdbiter->common.db);
8356
8357         dns_rbtnodechain_reset(&rbtdbiter->chain);
8358         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8359         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
8360         dns_db_detach(&db);
8361
8362         *iteratorp = NULL;
8363 }
8364
8365 static isc_result_t
8366 dbiterator_first(dns_dbiterator_t *iterator) {
8367         isc_result_t result;
8368         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8369         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8370         dns_name_t *name, *origin;
8371
8372         if (rbtdbiter->result != ISC_R_SUCCESS &&
8373             rbtdbiter->result != ISC_R_NOMORE)
8374                 return (rbtdbiter->result);
8375
8376         if (rbtdbiter->paused)
8377                 resume_iteration(rbtdbiter);
8378
8379         dereference_iter_node(rbtdbiter);
8380
8381         name = dns_fixedname_name(&rbtdbiter->name);
8382         origin = dns_fixedname_name(&rbtdbiter->origin);
8383         dns_rbtnodechain_reset(&rbtdbiter->chain);
8384         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8385
8386         if (rbtdbiter->nsec3only) {
8387                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8388                 result = dns_rbtnodechain_first(rbtdbiter->current,
8389                                                 rbtdb->nsec3, name, origin);
8390         } else {
8391                 rbtdbiter->current = &rbtdbiter->chain;
8392                 result = dns_rbtnodechain_first(rbtdbiter->current,
8393                                                 rbtdb->tree, name, origin);
8394                 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
8395                         rbtdbiter->current = &rbtdbiter->nsec3chain;
8396                         result = dns_rbtnodechain_first(rbtdbiter->current,
8397                                                         rbtdb->nsec3, name,
8398                                                         origin);
8399                 }
8400         }
8401         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8402                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8403                                                   NULL, &rbtdbiter->node);
8404                 if (result == ISC_R_SUCCESS) {
8405                         rbtdbiter->new_origin = ISC_TRUE;
8406                         reference_iter_node(rbtdbiter);
8407                 }
8408         } else {
8409                 INSIST(result == ISC_R_NOTFOUND);
8410                 result = ISC_R_NOMORE; /* The tree is empty. */
8411         }
8412
8413         rbtdbiter->result = result;
8414
8415         return (result);
8416 }
8417
8418 static isc_result_t
8419 dbiterator_last(dns_dbiterator_t *iterator) {
8420         isc_result_t result;
8421         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8422         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8423         dns_name_t *name, *origin;
8424
8425         if (rbtdbiter->result != ISC_R_SUCCESS &&
8426             rbtdbiter->result != ISC_R_NOMORE)
8427                 return (rbtdbiter->result);
8428
8429         if (rbtdbiter->paused)
8430                 resume_iteration(rbtdbiter);
8431
8432         dereference_iter_node(rbtdbiter);
8433
8434         name = dns_fixedname_name(&rbtdbiter->name);
8435         origin = dns_fixedname_name(&rbtdbiter->origin);
8436         dns_rbtnodechain_reset(&rbtdbiter->chain);
8437         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8438
8439         result = ISC_R_NOTFOUND;
8440         if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
8441                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8442                 result = dns_rbtnodechain_last(rbtdbiter->current,
8443                                                rbtdb->nsec3, name, origin);
8444         }
8445         if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
8446                 rbtdbiter->current = &rbtdbiter->chain;
8447                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8448                                                name, origin);
8449         }
8450         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8451                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8452                                                   NULL, &rbtdbiter->node);
8453                 if (result == ISC_R_SUCCESS) {
8454                         rbtdbiter->new_origin = ISC_TRUE;
8455                         reference_iter_node(rbtdbiter);
8456                 }
8457         } else {
8458                 INSIST(result == ISC_R_NOTFOUND);
8459                 result = ISC_R_NOMORE; /* The tree is empty. */
8460         }
8461
8462         rbtdbiter->result = result;
8463
8464         return (result);
8465 }
8466
8467 static isc_result_t
8468 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
8469         isc_result_t result;
8470         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8471         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8472         dns_name_t *iname, *origin;
8473
8474         if (rbtdbiter->result != ISC_R_SUCCESS &&
8475             rbtdbiter->result != ISC_R_NOTFOUND &&
8476             rbtdbiter->result != ISC_R_NOMORE)
8477                 return (rbtdbiter->result);
8478
8479         if (rbtdbiter->paused)
8480                 resume_iteration(rbtdbiter);
8481
8482         dereference_iter_node(rbtdbiter);
8483
8484         iname = dns_fixedname_name(&rbtdbiter->name);
8485         origin = dns_fixedname_name(&rbtdbiter->origin);
8486         dns_rbtnodechain_reset(&rbtdbiter->chain);
8487         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8488
8489         if (rbtdbiter->nsec3only) {
8490                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8491                 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8492                                           &rbtdbiter->node,
8493                                           rbtdbiter->current,
8494                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8495         } else if (rbtdbiter->nonsec3) {
8496                 rbtdbiter->current = &rbtdbiter->chain;
8497                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8498                                           &rbtdbiter->node,
8499                                           rbtdbiter->current,
8500                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8501         } else {
8502                 /*
8503                  * Stay on main chain if not found on either chain.
8504                  */
8505                 rbtdbiter->current = &rbtdbiter->chain;
8506                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8507                                           &rbtdbiter->node,
8508                                           rbtdbiter->current,
8509                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8510                 if (result == DNS_R_PARTIALMATCH) {
8511                         dns_rbtnode_t *node = NULL;
8512                         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8513                                                   &node, &rbtdbiter->nsec3chain,
8514                                                   DNS_RBTFIND_EMPTYDATA,
8515                                                   NULL, NULL);
8516                         if (result == ISC_R_SUCCESS) {
8517                                 rbtdbiter->node = node;
8518                                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8519                         }
8520                 }
8521         }
8522
8523 #if 1
8524         if (result == ISC_R_SUCCESS) {
8525                 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
8526                                                   origin, NULL);
8527                 if (result == ISC_R_SUCCESS) {
8528                         rbtdbiter->new_origin = ISC_TRUE;
8529                         reference_iter_node(rbtdbiter);
8530                 }
8531         } else if (result == DNS_R_PARTIALMATCH) {
8532                 result = ISC_R_NOTFOUND;
8533                 rbtdbiter->node = NULL;
8534         }
8535
8536         rbtdbiter->result = result;
8537 #else
8538         if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8539                 isc_result_t tresult;
8540                 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
8541                                                    origin, NULL);
8542                 if (tresult == ISC_R_SUCCESS) {
8543                         rbtdbiter->new_origin = ISC_TRUE;
8544                         reference_iter_node(rbtdbiter);
8545                 } else {
8546                         result = tresult;
8547                         rbtdbiter->node = NULL;
8548                 }
8549         } else
8550                 rbtdbiter->node = NULL;
8551
8552         rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
8553                             ISC_R_SUCCESS : result;
8554 #endif
8555
8556         return (result);
8557 }
8558
8559 static isc_result_t
8560 dbiterator_prev(dns_dbiterator_t *iterator) {
8561         isc_result_t result;
8562         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8563         dns_name_t *name, *origin;
8564         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8565
8566         REQUIRE(rbtdbiter->node != NULL);
8567
8568         if (rbtdbiter->result != ISC_R_SUCCESS)
8569                 return (rbtdbiter->result);
8570
8571         if (rbtdbiter->paused)
8572                 resume_iteration(rbtdbiter);
8573
8574         name = dns_fixedname_name(&rbtdbiter->name);
8575         origin = dns_fixedname_name(&rbtdbiter->origin);
8576         result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
8577         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8578             !rbtdbiter->nonsec3 &&
8579             &rbtdbiter->nsec3chain == rbtdbiter->current) {
8580                 rbtdbiter->current = &rbtdbiter->chain;
8581                 dns_rbtnodechain_reset(rbtdbiter->current);
8582                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8583                                                name, origin);
8584                 if (result == ISC_R_NOTFOUND)
8585                         result = ISC_R_NOMORE;
8586         }
8587
8588         dereference_iter_node(rbtdbiter);
8589
8590         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8591                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8592                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8593                                                   NULL, &rbtdbiter->node);
8594         }
8595
8596         if (result == ISC_R_SUCCESS)
8597                 reference_iter_node(rbtdbiter);
8598
8599         rbtdbiter->result = result;
8600
8601         return (result);
8602 }
8603
8604 static isc_result_t
8605 dbiterator_next(dns_dbiterator_t *iterator) {
8606         isc_result_t result;
8607         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8608         dns_name_t *name, *origin;
8609         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8610
8611         REQUIRE(rbtdbiter->node != NULL);
8612
8613         if (rbtdbiter->result != ISC_R_SUCCESS)
8614                 return (rbtdbiter->result);
8615
8616         if (rbtdbiter->paused)
8617                 resume_iteration(rbtdbiter);
8618
8619         name = dns_fixedname_name(&rbtdbiter->name);
8620         origin = dns_fixedname_name(&rbtdbiter->origin);
8621         result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8622         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8623             !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8624                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8625                 dns_rbtnodechain_reset(rbtdbiter->current);
8626                 result = dns_rbtnodechain_first(rbtdbiter->current,
8627                                                 rbtdb->nsec3, name, origin);
8628                 if (result == ISC_R_NOTFOUND)
8629                         result = ISC_R_NOMORE;
8630         }
8631
8632         dereference_iter_node(rbtdbiter);
8633
8634         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8635                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8636                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8637                                                   NULL, &rbtdbiter->node);
8638         }
8639         if (result == ISC_R_SUCCESS)
8640                 reference_iter_node(rbtdbiter);
8641
8642         rbtdbiter->result = result;
8643
8644         return (result);
8645 }
8646
8647 static isc_result_t
8648 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8649                    dns_name_t *name)
8650 {
8651         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8652         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8653         dns_rbtnode_t *node = rbtdbiter->node;
8654         isc_result_t result;
8655         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8656         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8657
8658         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8659         REQUIRE(rbtdbiter->node != NULL);
8660
8661         if (rbtdbiter->paused)
8662                 resume_iteration(rbtdbiter);
8663
8664         if (name != NULL) {
8665                 if (rbtdbiter->common.relative_names)
8666                         origin = NULL;
8667                 result = dns_name_concatenate(nodename, origin, name, NULL);
8668                 if (result != ISC_R_SUCCESS)
8669                         return (result);
8670                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8671                         result = DNS_R_NEWORIGIN;
8672         } else
8673                 result = ISC_R_SUCCESS;
8674
8675         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8676         new_reference(rbtdb, node);
8677         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8678
8679         *nodep = rbtdbiter->node;
8680
8681         if (iterator->cleaning && result == ISC_R_SUCCESS) {
8682                 isc_result_t expire_result;
8683
8684                 /*
8685                  * If the deletion array is full, flush it before trying
8686                  * to expire the current node.  The current node can't
8687                  * fully deleted while the iteration cursor is still on it.
8688                  */
8689                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8690                         flush_deletions(rbtdbiter);
8691
8692                 expire_result = expirenode(iterator->db, *nodep, 0);
8693
8694                 /*
8695                  * expirenode() currently always returns success.
8696                  */
8697                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8698                         unsigned int refs;
8699
8700                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
8701                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8702                         dns_rbtnode_refincrement(node, &refs);
8703                         INSIST(refs != 0);
8704                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8705                 }
8706         }
8707
8708         return (result);
8709 }
8710
8711 static isc_result_t
8712 dbiterator_pause(dns_dbiterator_t *iterator) {
8713         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8714         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8715
8716         if (rbtdbiter->result != ISC_R_SUCCESS &&
8717             rbtdbiter->result != ISC_R_NOMORE)
8718                 return (rbtdbiter->result);
8719
8720         if (rbtdbiter->paused)
8721                 return (ISC_R_SUCCESS);
8722
8723         rbtdbiter->paused = ISC_TRUE;
8724
8725         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8726                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8727                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8728                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8729         }
8730
8731         flush_deletions(rbtdbiter);
8732
8733         return (ISC_R_SUCCESS);
8734 }
8735
8736 static isc_result_t
8737 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8738         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8739         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8740
8741         if (rbtdbiter->result != ISC_R_SUCCESS)
8742                 return (rbtdbiter->result);
8743
8744         return (dns_name_copy(origin, name, NULL));
8745 }
8746
8747 /*%
8748  * Additional cache routines.
8749  */
8750 static isc_result_t
8751 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8752                        dns_rdatatype_t qtype, dns_acache_t *acache,
8753                        dns_zone_t **zonep, dns_db_t **dbp,
8754                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8755                        dns_name_t *fname, dns_message_t *msg,
8756                        isc_stdtime_t now)
8757 {
8758 #ifndef BIND9
8759         UNUSED(rdataset);
8760         UNUSED(type);
8761         UNUSED(qtype);
8762         UNUSED(acache);
8763         UNUSED(zonep);
8764         UNUSED(dbp);
8765         UNUSED(versionp);
8766         UNUSED(nodep);
8767         UNUSED(fname);
8768         UNUSED(msg);
8769         UNUSED(now);
8770
8771         return (ISC_R_NOTIMPLEMENTED);
8772 #else
8773         dns_rbtdb_t *rbtdb = rdataset->private1;
8774         dns_rbtnode_t *rbtnode = rdataset->private2;
8775         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8776         unsigned int current_count = rdataset->privateuint4;
8777         unsigned int count;
8778         rdatasetheader_t *header;
8779         nodelock_t *nodelock;
8780         unsigned int total_count;
8781         acachectl_t *acarray;
8782         dns_acacheentry_t *entry;
8783         isc_result_t result;
8784
8785         UNUSED(qtype); /* we do not use this value at least for now */
8786         UNUSED(acache);
8787
8788         header = (struct rdatasetheader *)(raw - sizeof(*header));
8789
8790         total_count = raw[0] * 256 + raw[1];
8791         INSIST(total_count > current_count);
8792         count = total_count - current_count - 1;
8793
8794         acarray = NULL;
8795
8796         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8797         NODE_LOCK(nodelock, isc_rwlocktype_read);
8798
8799         switch (type) {
8800         case dns_rdatasetadditional_fromauth:
8801                 acarray = header->additional_auth;
8802                 break;
8803         case dns_rdatasetadditional_fromcache:
8804                 acarray = NULL;
8805                 break;
8806         case dns_rdatasetadditional_fromglue:
8807                 acarray = header->additional_glue;
8808                 break;
8809         default:
8810                 INSIST(0);
8811         }
8812
8813         if (acarray == NULL) {
8814                 if (type != dns_rdatasetadditional_fromcache)
8815                         dns_acache_countquerymiss(acache);
8816                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8817                 return (ISC_R_NOTFOUND);
8818         }
8819
8820         if (acarray[count].entry == NULL) {
8821                 dns_acache_countquerymiss(acache);
8822                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8823                 return (ISC_R_NOTFOUND);
8824         }
8825
8826         entry = NULL;
8827         dns_acache_attachentry(acarray[count].entry, &entry);
8828
8829         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8830
8831         result = dns_acache_getentry(entry, zonep, dbp, versionp,
8832                                      nodep, fname, msg, now);
8833
8834         dns_acache_detachentry(&entry);
8835
8836         return (result);
8837 }
8838
8839 static void
8840 acache_callback(dns_acacheentry_t *entry, void **arg) {
8841         dns_rbtdb_t *rbtdb;
8842         dns_rbtnode_t *rbtnode;
8843         nodelock_t *nodelock;
8844         acachectl_t *acarray = NULL;
8845         acache_cbarg_t *cbarg;
8846         unsigned int count;
8847
8848         REQUIRE(arg != NULL);
8849         cbarg = *arg;
8850
8851         /*
8852          * The caller must hold the entry lock.
8853          */
8854
8855         rbtdb = (dns_rbtdb_t *)cbarg->db;
8856         rbtnode = (dns_rbtnode_t *)cbarg->node;
8857
8858         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8859         NODE_LOCK(nodelock, isc_rwlocktype_write);
8860
8861         switch (cbarg->type) {
8862         case dns_rdatasetadditional_fromauth:
8863                 acarray = cbarg->header->additional_auth;
8864                 break;
8865         case dns_rdatasetadditional_fromglue:
8866                 acarray = cbarg->header->additional_glue;
8867                 break;
8868         default:
8869                 INSIST(0);
8870         }
8871
8872         count = cbarg->count;
8873         if (acarray != NULL && acarray[count].entry == entry) {
8874                 acarray[count].entry = NULL;
8875                 INSIST(acarray[count].cbarg == cbarg);
8876                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8877                 acarray[count].cbarg = NULL;
8878         } else
8879                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8880
8881         dns_acache_detachentry(&entry);
8882
8883         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8884
8885         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8886         dns_db_detach((dns_db_t **)(void*)&rbtdb);
8887
8888         *arg = NULL;
8889 #endif /* BIND9 */
8890 }
8891
8892 #ifdef BIND9
8893 static void
8894 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8895                       acache_cbarg_t **cbargp)
8896 {
8897         acache_cbarg_t *cbarg;
8898
8899         REQUIRE(mctx != NULL);
8900         REQUIRE(entry != NULL);
8901         REQUIRE(cbargp != NULL && *cbargp != NULL);
8902
8903         cbarg = *cbargp;
8904
8905         dns_acache_cancelentry(entry);
8906         dns_db_detachnode(cbarg->db, &cbarg->node);
8907         dns_db_detach(&cbarg->db);
8908
8909         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8910
8911         *cbargp = NULL;
8912 }
8913 #endif /* BIND9 */
8914
8915 static isc_result_t
8916 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8917                        dns_rdatatype_t qtype, dns_acache_t *acache,
8918                        dns_zone_t *zone, dns_db_t *db,
8919                        dns_dbversion_t *version, dns_dbnode_t *node,
8920                        dns_name_t *fname)
8921 {
8922 #ifndef BIND9
8923         UNUSED(rdataset);
8924         UNUSED(type);
8925         UNUSED(qtype);
8926         UNUSED(acache);
8927         UNUSED(zone);
8928         UNUSED(db);
8929         UNUSED(version);
8930         UNUSED(node);
8931         UNUSED(fname);
8932
8933         return (ISC_R_NOTIMPLEMENTED);
8934 #else
8935         dns_rbtdb_t *rbtdb = rdataset->private1;
8936         dns_rbtnode_t *rbtnode = rdataset->private2;
8937         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8938         unsigned int current_count = rdataset->privateuint4;
8939         rdatasetheader_t *header;
8940         unsigned int total_count, count;
8941         nodelock_t *nodelock;
8942         isc_result_t result;
8943         acachectl_t *acarray;
8944         dns_acacheentry_t *newentry, *oldentry = NULL;
8945         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8946
8947         UNUSED(qtype);
8948
8949         if (type == dns_rdatasetadditional_fromcache)
8950                 return (ISC_R_SUCCESS);
8951
8952         header = (struct rdatasetheader *)(raw - sizeof(*header));
8953
8954         total_count = raw[0] * 256 + raw[1];
8955         INSIST(total_count > current_count);
8956         count = total_count - current_count - 1; /* should be private data */
8957
8958         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8959         if (newcbarg == NULL)
8960                 return (ISC_R_NOMEMORY);
8961         newcbarg->type = type;
8962         newcbarg->count = count;
8963         newcbarg->header = header;
8964         newcbarg->db = NULL;
8965         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8966         newcbarg->node = NULL;
8967         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8968                           &newcbarg->node);
8969         newentry = NULL;
8970         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8971                                         acache_callback, newcbarg, &newentry);
8972         if (result != ISC_R_SUCCESS)
8973                 goto fail;
8974         /* Set cache data in the new entry. */
8975         result = dns_acache_setentry(acache, newentry, zone, db,
8976                                      version, node, fname);
8977         if (result != ISC_R_SUCCESS)
8978                 goto fail;
8979
8980         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8981         NODE_LOCK(nodelock, isc_rwlocktype_write);
8982
8983         acarray = NULL;
8984         switch (type) {
8985         case dns_rdatasetadditional_fromauth:
8986                 acarray = header->additional_auth;
8987                 break;
8988         case dns_rdatasetadditional_fromglue:
8989                 acarray = header->additional_glue;
8990                 break;
8991         default:
8992                 INSIST(0);
8993         }
8994
8995         if (acarray == NULL) {
8996                 unsigned int i;
8997
8998                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8999                                       sizeof(acachectl_t));
9000
9001                 if (acarray == NULL) {
9002                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9003                         goto fail;
9004                 }
9005
9006                 for (i = 0; i < total_count; i++) {
9007                         acarray[i].entry = NULL;
9008                         acarray[i].cbarg = NULL;
9009                 }
9010         }
9011         switch (type) {
9012         case dns_rdatasetadditional_fromauth:
9013                 header->additional_auth = acarray;
9014                 break;
9015         case dns_rdatasetadditional_fromglue:
9016                 header->additional_glue = acarray;
9017                 break;
9018         default:
9019                 INSIST(0);
9020         }
9021
9022         if (acarray[count].entry != NULL) {
9023                 /*
9024                  * Swap the entry.  Delay cleaning-up the old entry since
9025                  * it would require a node lock.
9026                  */
9027                 oldentry = acarray[count].entry;
9028                 INSIST(acarray[count].cbarg != NULL);
9029                 oldcbarg = acarray[count].cbarg;
9030         }
9031         acarray[count].entry = newentry;
9032         acarray[count].cbarg = newcbarg;
9033
9034         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9035
9036         if (oldentry != NULL) {
9037                 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
9038                 dns_acache_detachentry(&oldentry);
9039         }
9040
9041         return (ISC_R_SUCCESS);
9042
9043  fail:
9044         if (newcbarg != NULL) {
9045                 if (newentry != NULL) {
9046                         acache_cancelentry(rbtdb->common.mctx, newentry,
9047                                            &newcbarg);
9048                         dns_acache_detachentry(&newentry);
9049                 } else {
9050                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
9051                         dns_db_detach(&newcbarg->db);
9052                         isc_mem_put(rbtdb->common.mctx, newcbarg,
9053                             sizeof(*newcbarg));
9054                 }
9055         }
9056
9057         return (result);
9058 #endif
9059 }
9060
9061 static isc_result_t
9062 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
9063                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
9064 {
9065 #ifndef BIND9
9066         UNUSED(acache);
9067         UNUSED(rdataset);
9068         UNUSED(type);
9069         UNUSED(qtype);
9070
9071         return (ISC_R_NOTIMPLEMENTED);
9072 #else
9073         dns_rbtdb_t *rbtdb = rdataset->private1;
9074         dns_rbtnode_t *rbtnode = rdataset->private2;
9075         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
9076         unsigned int current_count = rdataset->privateuint4;
9077         rdatasetheader_t *header;
9078         nodelock_t *nodelock;
9079         unsigned int total_count, count;
9080         acachectl_t *acarray;
9081         dns_acacheentry_t *entry;
9082         acache_cbarg_t *cbarg;
9083
9084         UNUSED(qtype);          /* we do not use this value at least for now */
9085         UNUSED(acache);
9086
9087         if (type == dns_rdatasetadditional_fromcache)
9088                 return (ISC_R_SUCCESS);
9089
9090         header = (struct rdatasetheader *)(raw - sizeof(*header));
9091
9092         total_count = raw[0] * 256 + raw[1];
9093         INSIST(total_count > current_count);
9094         count = total_count - current_count - 1;
9095
9096         acarray = NULL;
9097         entry = NULL;
9098
9099         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
9100         NODE_LOCK(nodelock, isc_rwlocktype_write);
9101
9102         switch (type) {
9103         case dns_rdatasetadditional_fromauth:
9104                 acarray = header->additional_auth;
9105                 break;
9106         case dns_rdatasetadditional_fromglue:
9107                 acarray = header->additional_glue;
9108                 break;
9109         default:
9110                 INSIST(0);
9111         }
9112
9113         if (acarray == NULL) {
9114                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9115                 return (ISC_R_NOTFOUND);
9116         }
9117
9118         entry = acarray[count].entry;
9119         if (entry == NULL) {
9120                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9121                 return (ISC_R_NOTFOUND);
9122         }
9123
9124         acarray[count].entry = NULL;
9125         cbarg = acarray[count].cbarg;
9126         acarray[count].cbarg = NULL;
9127
9128         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9129
9130         if (entry != NULL) {
9131                 if (cbarg != NULL)
9132                         acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
9133                 dns_acache_detachentry(&entry);
9134         }
9135
9136         return (ISC_R_SUCCESS);
9137 #endif
9138 }
9139
9140 /*%
9141  * Routines for LRU-based cache management.
9142  */
9143
9144 /*%
9145  * See if a given cache entry that is being reused needs to be updated
9146  * in the LRU-list.  From the LRU management point of view, this function is
9147  * expected to return true for almost all cases.  When used with threads,
9148  * however, this may cause a non-negligible performance penalty because a
9149  * writer lock will have to be acquired before updating the list.
9150  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
9151  * function returns true if the entry has not been updated for some period of
9152  * time.  We differentiate the NS or glue address case and the others since
9153  * experiments have shown that the former tends to be accessed relatively
9154  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
9155  * may cause external queries at a higher level zone, involving more
9156  * transactions).
9157  *
9158  * Caller must hold the node (read or write) lock.
9159  */
9160 static inline isc_boolean_t
9161 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
9162         if ((header->attributes &
9163              (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
9164                 return (ISC_FALSE);
9165
9166 #if DNS_RBTDB_LIMITLRUUPDATE
9167         if (header->type == dns_rdatatype_ns ||
9168             (header->trust == dns_trust_glue &&
9169              (header->type == dns_rdatatype_a ||
9170               header->type == dns_rdatatype_aaaa))) {
9171                 /*
9172                  * Glue records are updated if at least 60 seconds have passed
9173                  * since the previous update time.
9174                  */
9175                 return (header->last_used + 60 <= now);
9176         }
9177
9178         /* Other records are updated if 5 minutes have passed. */
9179         return (header->last_used + 300 <= now);
9180 #else
9181         UNUSED(now);
9182
9183         return (ISC_TRUE);
9184 #endif
9185 }
9186
9187 /*%
9188  * Update the timestamp of a given cache entry and move it to the head
9189  * of the corresponding LRU list.
9190  *
9191  * Caller must hold the node (write) lock.
9192  *
9193  * Note that the we do NOT touch the heap here, as the TTL has not changed.
9194  */
9195 static void
9196 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
9197               isc_stdtime_t now)
9198 {
9199         INSIST(IS_CACHE(rbtdb));
9200
9201         /* To be checked: can we really assume this? XXXMLG */
9202         INSIST(ISC_LINK_LINKED(header, link));
9203
9204         ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
9205         header->last_used = now;
9206         ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
9207 }
9208
9209 /*%
9210  * Purge some expired and/or stale (i.e. unused for some period) cache entries
9211  * under an overmem condition.  To recover from this condition quickly, up to
9212  * 2 entries will be purged.  This process is triggered while adding a new
9213  * entry, and we specifically avoid purging entries in the same LRU bucket as
9214  * the one to which the new entry will belong.  Otherwise, we might purge
9215  * entries of the same name of different RR types while adding RRsets from a
9216  * single response (consider the case where we're adding A and AAAA glue records
9217  * of the same NS name).
9218  */
9219 static void
9220 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
9221               isc_stdtime_t now, isc_boolean_t tree_locked)
9222 {
9223         rdatasetheader_t *header, *header_prev;
9224         unsigned int locknum;
9225         int purgecount = 2;
9226
9227         for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
9228              locknum != locknum_start && purgecount > 0;
9229              locknum = (locknum + 1) % rbtdb->node_lock_count) {
9230                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
9231                           isc_rwlocktype_write);
9232
9233                 header = isc_heap_element(rbtdb->heaps[locknum], 1);
9234                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
9235                         expire_header(rbtdb, header, tree_locked);
9236                         purgecount--;
9237                 }
9238
9239                 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
9240                      header != NULL && purgecount > 0;
9241                      header = header_prev) {
9242                         header_prev = ISC_LIST_PREV(header, link);
9243                         /*
9244                          * Unlink the entry at this point to avoid checking it
9245                          * again even if it's currently used someone else and
9246                          * cannot be purged at this moment.  This entry won't be
9247                          * referenced any more (so unlinking is safe) since the
9248                          * TTL was reset to 0.
9249                          */
9250                         ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
9251                                         link);
9252                         expire_header(rbtdb, header, tree_locked);
9253                         purgecount--;
9254                 }
9255
9256                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
9257                                     isc_rwlocktype_write);
9258         }
9259 }
9260
9261 static void
9262 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
9263               isc_boolean_t tree_locked)
9264 {
9265         set_ttl(rbtdb, header, 0);
9266         header->attributes |= RDATASET_ATTR_STALE;
9267         header->node->dirty = 1;
9268
9269         /*
9270          * Caller must hold the node (write) lock.
9271          */
9272
9273         if (dns_rbtnode_refcurrent(header->node) == 0) {
9274                 /*
9275                  * If no one else is using the node, we can clean it up now.
9276                  * We first need to gain a new reference to the node to meet a
9277                  * requirement of decrement_reference().
9278                  */
9279                 new_reference(rbtdb, header->node);
9280                 decrement_reference(rbtdb, header->node, 0,
9281                                     isc_rwlocktype_write,
9282                                     tree_locked ? isc_rwlocktype_write :
9283                                     isc_rwlocktype_none, ISC_FALSE);
9284         }
9285 }