]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/bind9/lib/dns/rbtdb.c
This commit was generated by cvs2svn to compensate for changes in r171945,
[FreeBSD/FreeBSD.git] / contrib / bind9 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004-2006  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: rbtdb.c,v 1.196.18.41 2006/10/26 06:04:29 marka Exp $ */
19
20 /*! \file */
21
22 /*
23  * Principal Author: Bob Halley
24  */
25
26 #include <config.h>
27
28 #include <isc/event.h>
29 #include <isc/mem.h>
30 #include <isc/print.h>
31 #include <isc/mutex.h>
32 #include <isc/random.h>
33 #include <isc/refcount.h>
34 #include <isc/rwlock.h>
35 #include <isc/string.h>
36 #include <isc/task.h>
37 #include <isc/time.h>
38 #include <isc/util.h>
39
40 #include <dns/acache.h>
41 #include <dns/db.h>
42 #include <dns/dbiterator.h>
43 #include <dns/events.h>
44 #include <dns/fixedname.h>
45 #include <dns/lib.h>
46 #include <dns/log.h>
47 #include <dns/masterdump.h>
48 #include <dns/rbt.h>
49 #include <dns/rdata.h>
50 #include <dns/rdataset.h>
51 #include <dns/rdatasetiter.h>
52 #include <dns/rdataslab.h>
53 #include <dns/result.h>
54 #include <dns/view.h>
55 #include <dns/zone.h>
56 #include <dns/zonekey.h>
57
58 #ifdef DNS_RBTDB_VERSION64
59 #include "rbtdb64.h"
60 #else
61 #include "rbtdb.h"
62 #endif
63
64 #ifdef DNS_RBTDB_VERSION64
65 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
66 #else
67 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
68 #endif
69
70 /*%
71  * Note that "impmagic" is not the first four bytes of the struct, so
72  * ISC_MAGIC_VALID cannot be used.
73  */
74 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
75                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
76
77 #ifdef DNS_RBTDB_VERSION64
78 typedef isc_uint64_t                    rbtdb_serial_t;
79 /*%
80  * Make casting easier in symbolic debuggers by using different names
81  * for the 64 bit version.
82  */
83 #define dns_rbtdb_t dns_rbtdb64_t
84 #define rdatasetheader_t rdatasetheader64_t
85 #define rbtdb_version_t rbtdb_version64_t
86 #else
87 typedef isc_uint32_t                    rbtdb_serial_t;
88 #endif
89
90 typedef isc_uint32_t                    rbtdb_rdatatype_t;
91
92 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
93 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
94 #define RBTDB_RDATATYPE_VALUE(b, e)     (((e) << 16) | (b))
95
96 #define RBTDB_RDATATYPE_SIGNSEC \
97                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
98 #define RBTDB_RDATATYPE_SIGNS \
99                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
100 #define RBTDB_RDATATYPE_SIGCNAME \
101                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
102 #define RBTDB_RDATATYPE_SIGDNAME \
103                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
104 #define RBTDB_RDATATYPE_NCACHEANY \
105                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
106
107 /*
108  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
109  * Using rwlock is effective with regard to lookup performance only when
110  * it is implemented in an efficient way.
111  * Otherwise, it is generally wise to stick to the simple locking since rwlock
112  * would require more memory or can even make lookups slower due to its own
113  * overhead (when it internally calls mutex locks).
114  */
115 #ifdef ISC_RWLOCK_USEATOMIC
116 #define DNS_RBTDB_USERWLOCK 1
117 #else
118 #define DNS_RBTDB_USERWLOCK 0
119 #endif
120
121 #if DNS_RBTDB_USERWLOCK
122 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
123 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
124 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
125 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
126 #else
127 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
128 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
129 #define RBTDB_LOCK(l, t)        LOCK(l)
130 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
131 #endif
132
133 /*
134  * Since node locking is sensitive to both performance and memory footprint,
135  * we need some trick here.  If we have both high-performance rwlock and
136  * high performance and small-memory reference counters, we use rwlock for
137  * node lock and isc_refcount for node references.  In this case, we don't have
138  * to protect the access to the counters by locks.
139  * Otherwise, we simply use ordinary mutex lock for node locking, and use
140  * simple integers as reference counters which is protected by the lock.
141  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
142  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
143  * counters first and then protect other parts of a node as read-only data.
144  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
145  * provided for these special cases.  When we can use the efficient backend
146  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
147  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
148  * section including the access to the reference counter.
149  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
150  * section is also protected by NODE_STRONGLOCK().
151  */
152 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
153 typedef isc_rwlock_t nodelock_t;
154
155 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
156 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
157 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
158 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
159 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
160
161 #define NODE_STRONGLOCK(l)      ((void)0)
162 #define NODE_STRONGUNLOCK(l)    ((void)0)
163 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
164 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
165 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
166 #else
167 typedef isc_mutex_t nodelock_t;
168
169 #define NODE_INITLOCK(l)        isc_mutex_init(l)
170 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
171 #define NODE_LOCK(l, t)         LOCK(l)
172 #define NODE_UNLOCK(l, t)       UNLOCK(l)
173 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
174
175 #define NODE_STRONGLOCK(l)      LOCK(l)
176 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
177 #define NODE_WEAKLOCK(l, t)     ((void)0)
178 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
179 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
180 #endif
181
182 /*
183  * Allow clients with a virtual time of upto 5 minutes in the past to see
184  * records that would have otherwise have expired.
185  */
186 #define RBTDB_VIRTUAL 300
187
188 struct noqname {
189         dns_name_t name;
190         void *     nsec;
191         void *     nsecsig;
192 };
193
194 typedef struct acachectl acachectl_t;  
195
196 typedef struct rdatasetheader {
197         /*%
198          * Locked by the owning node's lock.
199          */
200         rbtdb_serial_t                  serial;
201         dns_ttl_t                       ttl;
202         rbtdb_rdatatype_t               type;
203         isc_uint16_t                    attributes;
204         dns_trust_t                     trust;
205         struct noqname                  *noqname;
206         /*%<
207          * We don't use the LIST macros, because the LIST structure has
208          * both head and tail pointers, and is doubly linked.
209          */
210
211         struct rdatasetheader           *next;
212         /*%<
213          * If this is the top header for an rdataset, 'next' points
214          * to the top header for the next rdataset (i.e., the next type).
215          * Otherwise, it points up to the header whose down pointer points
216          * at this header.
217          */
218           
219         struct rdatasetheader           *down;
220         /*%<
221          * Points to the header for the next older version of
222          * this rdataset.
223          */
224
225         isc_uint32_t                    count;
226         /*%<
227          * Monotonously increased every time this rdataset is bound so that
228          * it is used as the base of the starting point in DNS responses
229          * when the "cyclic" rrset-order is required.  Since the ordering
230          * should not be so crucial, no lock is set for the counter for
231          * performance reasons.
232          */
233
234         acachectl_t                     *additional_auth;
235         acachectl_t                     *additional_glue;
236 } rdatasetheader_t;
237
238 #define RDATASET_ATTR_NONEXISTENT       0x0001
239 #define RDATASET_ATTR_STALE             0x0002
240 #define RDATASET_ATTR_IGNORE            0x0004
241 #define RDATASET_ATTR_RETAIN            0x0008
242 #define RDATASET_ATTR_NXDOMAIN          0x0010
243
244 typedef struct acache_cbarg {
245         dns_rdatasetadditional_t        type;
246         unsigned int                    count;
247         dns_db_t                        *db;
248         dns_dbnode_t                    *node;
249         rdatasetheader_t                *header;
250 } acache_cbarg_t;
251
252 struct acachectl {
253         dns_acacheentry_t               *entry;
254         acache_cbarg_t                  *cbarg;
255 };
256
257 /*
258  * XXX
259  * When the cache will pre-expire data (due to memory low or other
260  * situations) before the rdataset's TTL has expired, it MUST
261  * respect the RETAIN bit and not expire the data until its TTL is
262  * expired.
263  */
264
265 #undef IGNORE                   /* WIN32 winbase.h defines this. */
266
267 #define EXISTS(header) \
268         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
269 #define NONEXISTENT(header) \
270         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
271 #define IGNORE(header) \
272         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
273 #define RETAIN(header) \
274         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
275 #define NXDOMAIN(header) \
276         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
277
278 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
279 #define DEFAULT_CACHE_NODE_LOCK_COUNT   1009    /*%< Should be prime. */
280
281 typedef struct {
282         nodelock_t                      lock;
283         /* Protected in the refcount routines. */
284         isc_refcount_t                  references;
285         /* Locked by lock. */
286         isc_boolean_t                   exiting;
287 } rbtdb_nodelock_t;
288
289 typedef struct rbtdb_changed {
290         dns_rbtnode_t *                 node;
291         isc_boolean_t                   dirty;
292         ISC_LINK(struct rbtdb_changed)  link;
293 } rbtdb_changed_t;
294
295 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
296
297 typedef struct rbtdb_version {
298         /* Not locked */
299         rbtdb_serial_t                  serial;
300         /*
301          * Protected in the refcount routines.
302          * XXXJT: should we change the lock policy based on the refcount
303          * performance?
304          */
305         isc_refcount_t                  references;
306         /* Locked by database lock. */
307         isc_boolean_t                   writer;
308         isc_boolean_t                   commit_ok;
309         rbtdb_changedlist_t             changed_list;
310         ISC_LINK(struct rbtdb_version)  link;
311 } rbtdb_version_t;
312
313 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
314
315 typedef struct {
316         /* Unlocked. */
317         dns_db_t                        common;
318 #if DNS_RBTDB_USERWLOCK
319         isc_rwlock_t                    lock;
320 #else
321         isc_mutex_t                     lock;
322 #endif
323         isc_rwlock_t                    tree_lock;
324         unsigned int                    node_lock_count;
325         rbtdb_nodelock_t *              node_locks;
326         dns_rbtnode_t *                 origin_node;
327         /* Locked by lock. */
328         unsigned int                    active;
329         isc_refcount_t                  references;
330         unsigned int                    attributes;
331         rbtdb_serial_t                  current_serial;
332         rbtdb_serial_t                  least_serial;
333         rbtdb_serial_t                  next_serial;
334         rbtdb_version_t *               current_version;
335         rbtdb_version_t *               future_version;
336         rbtdb_versionlist_t             open_versions;
337         isc_boolean_t                   overmem;
338         isc_task_t *                    task;
339         dns_dbnode_t                    *soanode;
340         dns_dbnode_t                    *nsnode;
341         /* Locked by tree_lock. */
342         dns_rbt_t *                     tree;
343         isc_boolean_t                   secure;
344
345         /* Unlocked */
346         unsigned int                    quantum;
347 } dns_rbtdb_t;
348
349 #define RBTDB_ATTR_LOADED               0x01
350 #define RBTDB_ATTR_LOADING              0x02
351
352 /*%
353  * Search Context
354  */
355 typedef struct {
356         dns_rbtdb_t *           rbtdb;
357         rbtdb_version_t *       rbtversion;
358         rbtdb_serial_t          serial;
359         unsigned int            options;
360         dns_rbtnodechain_t      chain;
361         isc_boolean_t           copy_name;
362         isc_boolean_t           need_cleanup;
363         isc_boolean_t           wild;
364         dns_rbtnode_t *         zonecut;
365         rdatasetheader_t *      zonecut_rdataset;
366         rdatasetheader_t *      zonecut_sigrdataset;
367         dns_fixedname_t         zonecut_name;
368         isc_stdtime_t           now;
369 } rbtdb_search_t;
370
371 /*%
372  * Load Context
373  */
374 typedef struct {
375         dns_rbtdb_t *           rbtdb;
376         isc_stdtime_t           now;
377 } rbtdb_load_t;
378
379 static void rdataset_disassociate(dns_rdataset_t *rdataset);
380 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
381 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
382 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
383 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
384 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
385 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
386                                         dns_name_t *name,
387                                         dns_rdataset_t *nsec,
388                                         dns_rdataset_t *nsecsig);
389 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
390                                            dns_rdatasetadditional_t type,
391                                            dns_rdatatype_t qtype,
392                                            dns_acache_t *acache,
393                                            dns_zone_t **zonep,
394                                            dns_db_t **dbp,
395                                            dns_dbversion_t **versionp,
396                                            dns_dbnode_t **nodep,
397                                            dns_name_t *fname,
398                                            dns_message_t *msg,
399                                            isc_stdtime_t now);
400 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
401                                            dns_rdatasetadditional_t type,
402                                            dns_rdatatype_t qtype,
403                                            dns_acache_t *acache,
404                                            dns_zone_t *zone,
405                                            dns_db_t *db,
406                                            dns_dbversion_t *version,
407                                            dns_dbnode_t *node,
408                                            dns_name_t *fname);
409 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
410                                            dns_rdataset_t *rdataset,
411                                            dns_rdatasetadditional_t type,
412                                            dns_rdatatype_t qtype);
413
414 static dns_rdatasetmethods_t rdataset_methods = {
415         rdataset_disassociate,
416         rdataset_first,
417         rdataset_next,
418         rdataset_current,
419         rdataset_clone,
420         rdataset_count,
421         NULL,
422         rdataset_getnoqname,
423         rdataset_getadditional,
424         rdataset_setadditional,
425         rdataset_putadditional
426 };
427
428 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
429 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
430 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
431 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
432                                  dns_rdataset_t *rdataset);
433
434 static dns_rdatasetitermethods_t rdatasetiter_methods = {
435         rdatasetiter_destroy,
436         rdatasetiter_first,
437         rdatasetiter_next,
438         rdatasetiter_current
439 };
440
441 typedef struct rbtdb_rdatasetiter {
442         dns_rdatasetiter_t              common;
443         rdatasetheader_t *              current;
444 } rbtdb_rdatasetiter_t;
445
446 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
447 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
448 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
449 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
450                                         dns_name_t *name);
451 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
452 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
453 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
454                                            dns_dbnode_t **nodep,
455                                            dns_name_t *name);
456 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
457 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
458                                           dns_name_t *name);
459
460 static dns_dbiteratormethods_t dbiterator_methods = {
461         dbiterator_destroy,
462         dbiterator_first,
463         dbiterator_last,
464         dbiterator_seek,
465         dbiterator_prev,
466         dbiterator_next,
467         dbiterator_current,
468         dbiterator_pause,
469         dbiterator_origin
470 };
471
472 #define DELETION_BATCH_MAX 64
473
474 /*
475  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
476  */
477 typedef struct rbtdb_dbiterator {
478         dns_dbiterator_t                common;
479         isc_boolean_t                   paused;
480         isc_boolean_t                   new_origin;
481         isc_rwlocktype_t                tree_locked;
482         isc_result_t                    result;
483         dns_fixedname_t                 name;
484         dns_fixedname_t                 origin;
485         dns_rbtnodechain_t              chain;
486         dns_rbtnode_t                   *node;
487         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
488         int                             delete;
489 } rbtdb_dbiterator_t;
490
491
492 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
493 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
494
495 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
496                        isc_event_t *event);
497
498 /*
499  * Locking
500  *
501  * If a routine is going to lock more than one lock in this module, then
502  * the locking must be done in the following order:
503  *
504  *      Tree Lock
505  *
506  *      Node Lock       (Only one from the set may be locked at one time by
507  *                       any caller)
508  *
509  *      Database Lock
510  *
511  * Failure to follow this hierarchy can result in deadlock.
512  */
513
514 /*
515  * Deleting Nodes
516  *
517  * Currently there is no deletion of nodes from the database, except when
518  * the database is being destroyed.
519  *
520  * If node deletion is added in the future, then for zone databases the node
521  * for the origin of the zone MUST NOT be deleted.
522  */
523
524
525 /*
526  * DB Routines
527  */
528
529 static void
530 attach(dns_db_t *source, dns_db_t **targetp) {
531         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
532
533         REQUIRE(VALID_RBTDB(rbtdb));
534
535         isc_refcount_increment(&rbtdb->references, NULL);
536
537         *targetp = source;
538 }
539
540 static void
541 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
542         dns_rbtdb_t *rbtdb = event->ev_arg;
543
544         UNUSED(task);
545
546         free_rbtdb(rbtdb, ISC_TRUE, event);
547 }
548
549 /*%
550  * Work out how many nodes can be deleted in the time between two
551  * requests to the nameserver.  Smooth the resulting number and use it
552  * as a estimate for the number of nodes to be deleted in the next
553  * iteration.
554  */
555 static unsigned int
556 adjust_quantum(unsigned int old, isc_time_t *start) {
557         unsigned int pps = dns_pps;     /* packets per second */
558         unsigned int interval;
559         isc_uint64_t usecs;
560         isc_time_t end;
561         unsigned int new;
562
563         if (pps < 100)
564                 pps = 100;
565         isc_time_now(&end);
566
567         interval = 1000000 / pps;       /* interval in usec */
568         if (interval == 0)
569                 interval = 1;
570         usecs = isc_time_microdiff(&end, start);
571         if (usecs == 0) {
572                 /*
573                  * We were unable to measure the amount of time taken.
574                  * Double the nodes deleted next time.
575                  */
576                 old *= 2;
577                 if (old > 1000)
578                         old = 1000;
579                 return (old);
580         }
581         new = old * interval;
582         new /= (unsigned int)usecs;
583         if (new == 0)
584                 new = 1;
585         else if (new > 1000)
586                 new = 1000;
587
588         /* Smooth */
589         new = (new + old * 3) / 4;
590         
591         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
592                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
593
594         return (new);
595 }
596                 
597 static void
598 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
599         unsigned int i;
600         isc_ondestroy_t ondest;
601         isc_result_t result;
602         char buf[DNS_NAME_FORMATSIZE];
603         isc_time_t start;
604
605         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
606         REQUIRE(rbtdb->future_version == NULL);
607
608         if (rbtdb->current_version != NULL) {
609                 unsigned int refs;
610
611                 isc_refcount_decrement(&rbtdb->current_version->references,
612                                        &refs);
613                 INSIST(refs == 0);
614                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
615                 isc_refcount_destroy(&rbtdb->current_version->references);
616                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
617                             sizeof(rbtdb_version_t));
618         }
619         if (event == NULL)
620                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
621  again:
622         if (rbtdb->tree != NULL) {
623                 isc_time_now(&start);
624                 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
625                 if (result == ISC_R_QUOTA) {
626                         INSIST(rbtdb->task != NULL);
627                         if (rbtdb->quantum != 0)
628                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
629                                                                 &start);
630                         if (event == NULL)
631                                 event = isc_event_allocate(rbtdb->common.mctx,
632                                                            NULL,
633                                                          DNS_EVENT_FREESTORAGE,
634                                                            free_rbtdb_callback,
635                                                            rbtdb,
636                                                            sizeof(isc_event_t));
637                         if (event == NULL)
638                                 goto again;
639                         isc_task_send(rbtdb->task, &event);
640                         return;
641                 }
642                 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
643         }
644         if (event != NULL)
645                 isc_event_free(&event);
646         if (log) {
647                 if (dns_name_dynamic(&rbtdb->common.origin))
648                         dns_name_format(&rbtdb->common.origin, buf,
649                                         sizeof(buf));
650                 else
651                         strcpy(buf, "<UNKNOWN>");
652                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
653                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
654                               "done free_rbtdb(%s)", buf);
655         }
656         if (dns_name_dynamic(&rbtdb->common.origin))
657                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
658         for (i = 0; i < rbtdb->node_lock_count; i++) {
659                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
660                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
661         }
662         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
663                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
664         isc_rwlock_destroy(&rbtdb->tree_lock);
665         isc_refcount_destroy(&rbtdb->references);
666         if (rbtdb->task != NULL)
667                 isc_task_detach(&rbtdb->task);
668         RBTDB_DESTROYLOCK(&rbtdb->lock);
669         rbtdb->common.magic = 0;
670         rbtdb->common.impmagic = 0;
671         ondest = rbtdb->common.ondest;
672         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
673         isc_ondestroy_notify(&ondest, rbtdb);
674 }
675
676 static inline void
677 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
678         isc_boolean_t want_free = ISC_FALSE;
679         unsigned int i;
680         unsigned int inactive = 0;
681
682         /* XXX check for open versions here */
683
684         if (rbtdb->soanode != NULL)
685                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
686         if (rbtdb->nsnode != NULL)
687                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
688
689         /*
690          * Even though there are no external direct references, there still
691          * may be nodes in use.
692          */
693         for (i = 0; i < rbtdb->node_lock_count; i++) {
694                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
695                 rbtdb->node_locks[i].exiting = ISC_TRUE;
696                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
697                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
698                     == 0) {
699                         inactive++;
700                 }
701         }
702
703         if (inactive != 0) {
704                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
705                 rbtdb->active -= inactive;
706                 if (rbtdb->active == 0)
707                         want_free = ISC_TRUE;
708                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
709                 if (want_free) {
710                         char buf[DNS_NAME_FORMATSIZE];
711                         if (dns_name_dynamic(&rbtdb->common.origin))
712                                 dns_name_format(&rbtdb->common.origin, buf,
713                                                 sizeof(buf));
714                         else
715                                 strcpy(buf, "<UNKNOWN>");
716                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
717                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
718                                       "calling free_rbtdb(%s)", buf);
719                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
720                 }
721         }
722 }
723
724 static void
725 detach(dns_db_t **dbp) {
726         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
727         unsigned int refs;
728
729         REQUIRE(VALID_RBTDB(rbtdb));
730
731         isc_refcount_decrement(&rbtdb->references, &refs);
732
733         if (refs == 0)
734                 maybe_free_rbtdb(rbtdb);
735
736         *dbp = NULL;
737 }
738
739 static void
740 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
741         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
742         rbtdb_version_t *version;
743         unsigned int refs;
744
745         REQUIRE(VALID_RBTDB(rbtdb));
746
747         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
748         version = rbtdb->current_version;
749         isc_refcount_increment(&version->references, &refs);
750         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
751
752         *versionp = (dns_dbversion_t *)version;
753 }
754
755 static inline rbtdb_version_t *
756 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
757                  unsigned int references, isc_boolean_t writer)
758 {
759         isc_result_t result;
760         rbtdb_version_t *version;
761
762         version = isc_mem_get(mctx, sizeof(*version));
763         if (version == NULL)
764                 return (NULL);
765         version->serial = serial;
766         result = isc_refcount_init(&version->references, references);
767         if (result != ISC_R_SUCCESS) {
768                 isc_mem_put(mctx, version, sizeof(*version));
769                 return (NULL);
770         }
771         version->writer = writer;
772         version->commit_ok = ISC_FALSE;
773         ISC_LIST_INIT(version->changed_list);
774         ISC_LINK_INIT(version, link);
775
776         return (version);
777 }
778
779 static isc_result_t
780 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
781         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
782         rbtdb_version_t *version;
783
784         REQUIRE(VALID_RBTDB(rbtdb));
785         REQUIRE(versionp != NULL && *versionp == NULL);
786         REQUIRE(rbtdb->future_version == NULL);
787
788         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
789         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
790         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
791                                    ISC_TRUE);
792         if (version != NULL) {
793                 version->commit_ok = ISC_TRUE;
794                 rbtdb->next_serial++;
795                 rbtdb->future_version = version;
796         }
797         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
798
799         if (version == NULL)
800                 return (ISC_R_NOMEMORY);
801
802         *versionp = version;
803
804         return (ISC_R_SUCCESS);
805 }
806
807 static void
808 attachversion(dns_db_t *db, dns_dbversion_t *source,
809               dns_dbversion_t **targetp)
810 {
811         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
812         rbtdb_version_t *rbtversion = source;
813         unsigned int refs;
814
815         REQUIRE(VALID_RBTDB(rbtdb));
816
817         isc_refcount_increment(&rbtversion->references, &refs);
818         INSIST(refs > 1);
819
820         *targetp = rbtversion;
821 }
822
823 static rbtdb_changed_t *
824 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
825             dns_rbtnode_t *node)
826 {
827         rbtdb_changed_t *changed;
828         unsigned int refs;
829
830         /*
831          * Caller must be holding the node lock if its reference must be
832          * protected by the lock.
833          */
834
835         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
836
837         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
838
839         REQUIRE(version->writer);
840
841         if (changed != NULL) {
842                 dns_rbtnode_refincrement(node, &refs);
843                 INSIST(refs != 0);
844                 changed->node = node;
845                 changed->dirty = ISC_FALSE;
846                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
847         } else
848                 version->commit_ok = ISC_FALSE;
849
850         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
851
852         return (changed);
853 }
854
855 static void
856 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
857                  acachectl_t *array)
858 {
859         unsigned int count;
860         unsigned int i;
861         unsigned char *raw;     /* RDATASLAB */
862
863         /*
864          * The caller must be holding the corresponding node lock.
865          */
866
867         if (array == NULL)
868                 return;
869
870         raw = (unsigned char *)header + sizeof(*header);
871         count = raw[0] * 256 + raw[1];
872
873         /*
874          * Sanity check: since an additional cache entry has a reference to
875          * the original DB node (in the callback arg), there should be no
876          * acache entries when the node can be freed. 
877          */
878         for (i = 0; i < count; i++)
879                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
880
881         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
882 }
883
884 static inline void
885 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
886
887         if (dns_name_dynamic(&(*noqname)->name))
888                 dns_name_free(&(*noqname)->name, mctx);
889         if ((*noqname)->nsec != NULL)
890                 isc_mem_put(mctx, (*noqname)->nsec,
891                             dns_rdataslab_size((*noqname)->nsec, 0));
892         if ((*noqname)->nsec != NULL)
893                 isc_mem_put(mctx, (*noqname)->nsecsig,
894                             dns_rdataslab_size((*noqname)->nsecsig, 0));
895         isc_mem_put(mctx, *noqname, sizeof(**noqname));
896         *noqname = NULL;
897 }
898
899 static inline void
900 free_rdataset(isc_mem_t *mctx, rdatasetheader_t *rdataset) {
901         unsigned int size;
902
903         if (rdataset->noqname != NULL)
904                 free_noqname(mctx, &rdataset->noqname);
905
906         free_acachearray(mctx, rdataset, rdataset->additional_auth);
907         free_acachearray(mctx, rdataset, rdataset->additional_glue);
908
909         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
910                 size = sizeof(*rdataset);
911         else
912                 size = dns_rdataslab_size((unsigned char *)rdataset,
913                                           sizeof(*rdataset));
914         isc_mem_put(mctx, rdataset, size);
915 }
916
917 static inline void
918 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
919         rdatasetheader_t *header, *dcurrent;
920         isc_boolean_t make_dirty = ISC_FALSE;
921
922         /*
923          * Caller must hold the node lock.
924          */
925
926         /*
927          * We set the IGNORE attribute on rdatasets with serial number
928          * 'serial'.  When the reference count goes to zero, these rdatasets
929          * will be cleaned up; until that time, they will be ignored.
930          */
931         for (header = node->data; header != NULL; header = header->next) {
932                 if (header->serial == serial) {
933                         header->attributes |= RDATASET_ATTR_IGNORE;
934                         make_dirty = ISC_TRUE;
935                 }
936                 for (dcurrent = header->down;
937                      dcurrent != NULL;
938                      dcurrent = dcurrent->down) {
939                         if (dcurrent->serial == serial) {
940                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
941                                 make_dirty = ISC_TRUE;
942                         }
943                 }
944         }
945         if (make_dirty)
946                 node->dirty = 1;
947 }
948
949 static inline void
950 clean_stale_headers(isc_mem_t *mctx, rdatasetheader_t *top) {
951         rdatasetheader_t *d, *down_next;
952
953         for (d = top->down; d != NULL; d = down_next) {
954                 down_next = d->down;
955                 free_rdataset(mctx, d);
956         }
957         top->down = NULL;
958 }
959
960 static inline void
961 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
962         rdatasetheader_t *current, *top_prev, *top_next;
963         isc_mem_t *mctx = rbtdb->common.mctx;
964
965         /*
966          * Caller must be holding the node lock.
967          */
968
969         top_prev = NULL;
970         for (current = node->data; current != NULL; current = top_next) {
971                 top_next = current->next;
972                 clean_stale_headers(mctx, current);
973                 /*
974                  * If current is nonexistent or stale, we can clean it up.
975                  */
976                 if ((current->attributes &
977                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
978                         if (top_prev != NULL)
979                                 top_prev->next = current->next;
980                         else
981                                 node->data = current->next;
982                         free_rdataset(mctx, current);
983                 } else
984                         top_prev = current;
985         }
986         node->dirty = 0;
987 }
988
989 static inline void
990 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
991                 rbtdb_serial_t least_serial)
992 {
993         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
994         rdatasetheader_t *top_prev, *top_next;
995         isc_mem_t *mctx = rbtdb->common.mctx;
996         isc_boolean_t still_dirty = ISC_FALSE;
997
998         /*
999          * Caller must be holding the node lock.
1000          */
1001         REQUIRE(least_serial != 0);
1002
1003         top_prev = NULL;
1004         for (current = node->data; current != NULL; current = top_next) {
1005                 top_next = current->next;
1006
1007                 /*
1008                  * First, we clean up any instances of multiple rdatasets
1009                  * with the same serial number, or that have the IGNORE
1010                  * attribute.
1011                  */
1012                 dparent = current;
1013                 for (dcurrent = current->down;
1014                      dcurrent != NULL;
1015                      dcurrent = down_next) {
1016                         down_next = dcurrent->down;
1017                         INSIST(dcurrent->serial <= dparent->serial);
1018                         if (dcurrent->serial == dparent->serial ||
1019                             IGNORE(dcurrent)) {
1020                                 if (down_next != NULL)
1021                                         down_next->next = dparent;
1022                                 dparent->down = down_next;
1023                                 free_rdataset(mctx, dcurrent);
1024                         } else
1025                                 dparent = dcurrent;
1026                 }
1027
1028                 /*
1029                  * We've now eliminated all IGNORE datasets with the possible
1030                  * exception of current, which we now check.
1031                  */
1032                 if (IGNORE(current)) {
1033                         down_next = current->down;
1034                         if (down_next == NULL) {
1035                                 if (top_prev != NULL)
1036                                         top_prev->next = current->next;
1037                                 else
1038                                         node->data = current->next;
1039                                 free_rdataset(mctx, current);
1040                                 /*
1041                                  * current no longer exists, so we can
1042                                  * just continue with the loop.
1043                                  */
1044                                 continue;
1045                         } else {
1046                                 /*
1047                                  * Pull up current->down, making it the new
1048                                  * current.
1049                                  */
1050                                 if (top_prev != NULL)
1051                                         top_prev->next = down_next;
1052                                 else
1053                                         node->data = down_next;
1054                                 down_next->next = top_next;
1055                                 free_rdataset(mctx, current);
1056                                 current = down_next;
1057                         }
1058                 }
1059
1060                 /*
1061                  * We now try to find the first down node less than the
1062                  * least serial.
1063                  */
1064                 dparent = current;
1065                 for (dcurrent = current->down;
1066                      dcurrent != NULL;
1067                      dcurrent = down_next) {
1068                         down_next = dcurrent->down;
1069                         if (dcurrent->serial < least_serial)
1070                                 break;
1071                         dparent = dcurrent;
1072                 }
1073
1074                 /*
1075                  * If there is a such an rdataset, delete it and any older
1076                  * versions.
1077                  */
1078                 if (dcurrent != NULL) {
1079                         do {
1080                                 down_next = dcurrent->down;
1081                                 INSIST(dcurrent->serial <= least_serial);
1082                                 free_rdataset(mctx, dcurrent);
1083                                 dcurrent = down_next;
1084                         } while (dcurrent != NULL);
1085                         dparent->down = NULL;
1086                 }
1087
1088                 /*
1089                  * Note.  The serial number of 'current' might be less than
1090                  * least_serial too, but we cannot delete it because it is
1091                  * the most recent version, unless it is a NONEXISTENT
1092                  * rdataset.
1093                  */
1094                 if (current->down != NULL) {
1095                         still_dirty = ISC_TRUE;
1096                         top_prev = current;
1097                 } else {
1098                         /*
1099                          * If this is a NONEXISTENT rdataset, we can delete it.
1100                          */
1101                         if (NONEXISTENT(current)) {
1102                                 if (top_prev != NULL)
1103                                         top_prev->next = current->next;
1104                                 else
1105                                         node->data = current->next;
1106                                 free_rdataset(mctx, current);
1107                         } else
1108                                 top_prev = current;
1109                 }
1110         }
1111         if (!still_dirty)
1112                 node->dirty = 0;
1113 }
1114
1115 /*
1116  * Caller must be holding the node lock if its reference must be protected
1117  * by the lock.
1118  */
1119 static inline void
1120 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1121         unsigned int lockrefs, noderefs;
1122         isc_refcount_t *lockref;
1123
1124         dns_rbtnode_refincrement0(node, &noderefs);
1125         if (noderefs == 1) {    /* this is the first reference to the node */
1126                 lockref = &rbtdb->node_locks[node->locknum].references;
1127                 isc_refcount_increment0(lockref, &lockrefs);
1128                 INSIST(lockrefs != 0);
1129         }
1130         INSIST(noderefs != 0);
1131 }
1132
1133 /*
1134  * Caller must be holding the node lock; either the "strong", read or write
1135  * lock.  Note that the lock must be held even when node references are
1136  * atomically modified; in that case the decrement operation itself does not
1137  * have to be protected, but we must avoid a race condition where multiple
1138  * threads are decreasing the reference to zero simultaneously and at least
1139  * one of them is going to free the node.
1140  * This function returns ISC_TRUE if and only if the node reference decreases
1141  * to zero.
1142  */
1143 static isc_boolean_t
1144 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1145                     rbtdb_serial_t least_serial,
1146                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock)
1147 {
1148         isc_result_t result;
1149         isc_boolean_t write_locked;
1150         rbtdb_nodelock_t *nodelock;
1151         unsigned int refs, nrefs;
1152
1153         nodelock = &rbtdb->node_locks[node->locknum];
1154
1155         /* Handle easy and typical case first. */
1156         if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1157                 dns_rbtnode_refdecrement(node, &nrefs);
1158                 INSIST((int)nrefs >= 0);
1159                 if (nrefs == 0) {
1160                         isc_refcount_decrement(&nodelock->references, &refs);
1161                         INSIST((int)refs >= 0);
1162                 }
1163                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1164         }
1165
1166         /* Upgrade the lock? */
1167         if (nlock == isc_rwlocktype_read) {
1168                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1169                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1170         }
1171         dns_rbtnode_refdecrement(node, &nrefs);
1172         INSIST((int)nrefs >= 0);
1173         if (nrefs > 0) {
1174                 /* Restore the lock? */
1175                 if (nlock == isc_rwlocktype_read)
1176                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1177                 return (ISC_FALSE);
1178         }
1179
1180         if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1181                 if (IS_CACHE(rbtdb))
1182                         clean_cache_node(rbtdb, node);
1183                 else {
1184                         if (least_serial == 0) {
1185                                 /*
1186                                  * Caller doesn't know the least serial.
1187                                  * Get it.
1188                                  */
1189                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1190                                 least_serial = rbtdb->least_serial;
1191                                 RBTDB_UNLOCK(&rbtdb->lock,
1192                                              isc_rwlocktype_read);
1193                         }
1194                         clean_zone_node(rbtdb, node, least_serial);
1195                 }
1196         }
1197
1198         isc_refcount_decrement(&nodelock->references, &refs);
1199         INSIST((int)refs >= 0);
1200
1201         /*
1202          * XXXDCL should this only be done for cache zones?
1203          */
1204         if (node->data != NULL || node->down != NULL) {
1205                 /* Restore the lock? */
1206                 if (nlock == isc_rwlocktype_read)
1207                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1208                 return (ISC_TRUE);
1209         }
1210
1211         /*
1212          * XXXDCL need to add a deferred delete method for ISC_R_LOCKBUSY.
1213          */
1214         if (tlock != isc_rwlocktype_write) {
1215                 /*
1216                  * Locking hierarchy notwithstanding, we don't need to free
1217                  * the node lock before acquiring the tree write lock because
1218                  * we only do a trylock.
1219                  */
1220                 if (tlock == isc_rwlocktype_read)
1221                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1222                 else
1223                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
1224                                                     isc_rwlocktype_write);
1225                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1226                               result == ISC_R_LOCKBUSY);
1227  
1228                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1229         } else
1230                 write_locked = ISC_TRUE;
1231
1232         if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1233                 /*
1234                  * We can now delete the node if the reference counter is
1235                  * zero.  This should be typically the case, but a different
1236                  * thread may still gain a (new) reference just before the
1237                  * current thread locks the tree (e.g., in findnode()).
1238                  */
1239
1240                 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1241                         char printname[DNS_NAME_FORMATSIZE];
1242
1243                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1244                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1245                                       "decrement_reference: "
1246                                       "delete from rbt: %p %s",
1247                                       node,
1248                                       dns_rbt_formatnodename(node, printname,
1249                                                            sizeof(printname)));
1250                 }
1251
1252                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1253                 if (result != ISC_R_SUCCESS)
1254                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1255                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1256                                       "decrement_reference: "
1257                                       "dns_rbt_deletenode: %s",
1258                                       isc_result_totext(result));
1259         }
1260
1261         /* Restore the lock? */
1262         if (nlock == isc_rwlocktype_read)
1263                 NODE_WEAKDOWNGRADE(&nodelock->lock);
1264
1265         /*
1266          * Relock a read lock, or unlock the write lock if no lock was held.
1267          */
1268         if (tlock == isc_rwlocktype_none)
1269                 if (write_locked)
1270                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1271
1272         if (tlock == isc_rwlocktype_read)
1273                 if (write_locked)
1274                         isc_rwlock_downgrade(&rbtdb->tree_lock);
1275
1276         return (ISC_TRUE);
1277 }
1278
1279 static inline void
1280 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1281                    rbtdb_changedlist_t *cleanup_list)
1282 {
1283         /*
1284          * Caller must be holding the database lock.
1285          */
1286
1287         rbtdb->least_serial = version->serial;
1288         *cleanup_list = version->changed_list;
1289         ISC_LIST_INIT(version->changed_list);
1290 }
1291
1292 static inline void
1293 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1294         rbtdb_changed_t *changed, *next_changed;
1295
1296         /*
1297          * If the changed record is dirty, then
1298          * an update created multiple versions of
1299          * a given rdataset.  We keep this list
1300          * until we're the least open version, at
1301          * which point it's safe to get rid of any
1302          * older versions.
1303          *
1304          * If the changed record isn't dirty, then
1305          * we don't need it anymore since we're
1306          * committing and not rolling back.
1307          *
1308          * The caller must be holding the database lock.
1309          */
1310         for (changed = HEAD(version->changed_list);
1311              changed != NULL;
1312              changed = next_changed) {
1313                 next_changed = NEXT(changed, link);
1314                 if (!changed->dirty) {
1315                         UNLINK(version->changed_list,
1316                                changed, link);
1317                         APPEND(*cleanup_list,
1318                                changed, link);
1319                 }
1320         }
1321 }
1322
1323 static isc_boolean_t
1324 iszonesecure(dns_db_t *db, dns_dbnode_t *origin) {
1325         dns_rdataset_t keyset;
1326         dns_rdataset_t nsecset, signsecset;
1327         isc_boolean_t haszonekey = ISC_FALSE;
1328         isc_boolean_t hasnsec = ISC_FALSE;
1329         isc_result_t result;
1330
1331         dns_rdataset_init(&keyset);
1332         result = dns_db_findrdataset(db, origin, NULL, dns_rdatatype_dnskey, 0,
1333                                      0, &keyset, NULL);
1334         if (result == ISC_R_SUCCESS) {
1335                 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1336                 result = dns_rdataset_first(&keyset);
1337                 while (result == ISC_R_SUCCESS) {
1338                         dns_rdataset_current(&keyset, &keyrdata);
1339                         if (dns_zonekey_iszonekey(&keyrdata)) {
1340                                 haszonekey = ISC_TRUE;
1341                                 break;
1342                         }
1343                         result = dns_rdataset_next(&keyset);
1344                 }
1345                 dns_rdataset_disassociate(&keyset);
1346         }
1347         if (!haszonekey)
1348                 return (ISC_FALSE);
1349
1350         dns_rdataset_init(&nsecset);
1351         dns_rdataset_init(&signsecset);
1352         result = dns_db_findrdataset(db, origin, NULL, dns_rdatatype_nsec, 0,
1353                                      0, &nsecset, &signsecset);
1354         if (result == ISC_R_SUCCESS) {
1355                 if (dns_rdataset_isassociated(&signsecset)) {
1356                         hasnsec = ISC_TRUE;
1357                         dns_rdataset_disassociate(&signsecset);
1358                 }
1359                 dns_rdataset_disassociate(&nsecset);
1360         }
1361         return (hasnsec);
1362 }
1363
1364 static void
1365 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
1366         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1367         rbtdb_version_t *version, *cleanup_version, *least_greater;
1368         isc_boolean_t rollback = ISC_FALSE;
1369         rbtdb_changedlist_t cleanup_list;
1370         rbtdb_changed_t *changed, *next_changed;
1371         rbtdb_serial_t serial, least_serial;
1372         dns_rbtnode_t *rbtnode;
1373         unsigned int refs;
1374
1375         REQUIRE(VALID_RBTDB(rbtdb));
1376         version = (rbtdb_version_t *)*versionp;
1377
1378         cleanup_version = NULL;
1379         ISC_LIST_INIT(cleanup_list);
1380
1381         isc_refcount_decrement(&version->references, &refs);
1382         if (refs > 0) {         /* typical and easy case first */
1383                 if (commit) {
1384                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1385                         INSIST(!version->writer);
1386                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1387                 }
1388                 goto end;
1389         }
1390
1391         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1392         serial = version->serial;
1393         if (version->writer) {
1394                 if (commit) {
1395                         unsigned cur_ref;
1396                         rbtdb_version_t *cur_version;
1397
1398                         INSIST(version->commit_ok);
1399                         INSIST(version == rbtdb->future_version);
1400                         /*
1401                          * The current version is going to be replaced.
1402                          * Release the (likely last) reference to it from the
1403                          * DB itself and unlink it from the open list.
1404                          */
1405                         cur_version = rbtdb->current_version;
1406                         isc_refcount_decrement(&cur_version->references,
1407                                                &cur_ref);
1408                         if (cur_ref == 0) {
1409                                 if (cur_version->serial == rbtdb->least_serial)
1410                                         INSIST(EMPTY(cur_version->changed_list));
1411                                 UNLINK(rbtdb->open_versions,
1412                                        cur_version, link);
1413                         }
1414                         if (EMPTY(rbtdb->open_versions)) {
1415                                 /*
1416                                  * We're going to become the least open
1417                                  * version.
1418                                  */
1419                                 make_least_version(rbtdb, version,
1420                                                    &cleanup_list);
1421                         } else {
1422                                 /*
1423                                  * Some other open version is the
1424                                  * least version.  We can't cleanup
1425                                  * records that were changed in this
1426                                  * version because the older versions
1427                                  * may still be in use by an open
1428                                  * version.
1429                                  *
1430                                  * We can, however, discard the
1431                                  * changed records for things that
1432                                  * we've added that didn't exist in
1433                                  * prior versions.
1434                                  */
1435                                 cleanup_nondirty(version, &cleanup_list);
1436                         }
1437                         /*
1438                          * If the (soon to be former) current version
1439                          * isn't being used by anyone, we can clean
1440                          * it up.
1441                          */
1442                         if (cur_ref == 0) {
1443                                 cleanup_version = cur_version;
1444                                 APPENDLIST(version->changed_list,
1445                                            cleanup_version->changed_list,
1446                                            link);
1447                         }
1448                         /*
1449                          * Become the current version.
1450                          */
1451                         version->writer = ISC_FALSE;
1452                         rbtdb->current_version = version;
1453                         rbtdb->current_serial = version->serial;
1454                         rbtdb->future_version = NULL;
1455
1456                         /*
1457                          * Keep the current version in the open list, and
1458                          * gain a reference for the DB itself (see the DB
1459                          * creation function below).  This must be the only
1460                          * case where we need to increment the counter from
1461                          * zero and need to use isc_refcount_increment0().
1462                          */
1463                         isc_refcount_increment0(&version->references,
1464                                                 &cur_ref);
1465                         INSIST(cur_ref == 1);
1466                         PREPEND(rbtdb->open_versions,
1467                                 rbtdb->current_version, link);
1468                 } else {
1469                         /*
1470                          * We're rolling back this transaction.
1471                          */
1472                         cleanup_list = version->changed_list;
1473                         ISC_LIST_INIT(version->changed_list);
1474                         rollback = ISC_TRUE;
1475                         cleanup_version = version;
1476                         rbtdb->future_version = NULL;
1477                 }
1478         } else {
1479                 if (version != rbtdb->current_version) {
1480                         /*
1481                          * There are no external or internal references
1482                          * to this version and it can be cleaned up.
1483                          */
1484                         cleanup_version = version;
1485
1486                         /*
1487                          * Find the version with the least serial
1488                          * number greater than ours.
1489                          */
1490                         least_greater = PREV(version, link);
1491                         if (least_greater == NULL)
1492                                 least_greater = rbtdb->current_version;
1493
1494                         INSIST(version->serial < least_greater->serial);
1495                         /*
1496                          * Is this the least open version?
1497                          */
1498                         if (version->serial == rbtdb->least_serial) {
1499                                 /*
1500                                  * Yes.  Install the new least open
1501                                  * version.
1502                                  */
1503                                 make_least_version(rbtdb,
1504                                                    least_greater,
1505                                                    &cleanup_list);
1506                         } else {
1507                                 /*
1508                                  * Add any unexecuted cleanups to
1509                                  * those of the least greater version.
1510                                  */
1511                                 APPENDLIST(least_greater->changed_list,
1512                                            version->changed_list,
1513                                            link);
1514                         }
1515                 } else if (version->serial == rbtdb->least_serial)
1516                         INSIST(EMPTY(version->changed_list));
1517                 UNLINK(rbtdb->open_versions, version, link);
1518         }
1519         least_serial = rbtdb->least_serial;
1520         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1521
1522         /*
1523          * Update the zone's secure status.
1524          */
1525         if (version->writer && commit && !IS_CACHE(rbtdb))
1526                 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
1527
1528         if (cleanup_version != NULL) {
1529                 INSIST(EMPTY(cleanup_version->changed_list));
1530                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
1531                             sizeof(*cleanup_version));
1532         }
1533
1534         if (!EMPTY(cleanup_list)) {
1535                 for (changed = HEAD(cleanup_list);
1536                      changed != NULL;
1537                      changed = next_changed) {
1538                         nodelock_t *lock;
1539
1540                         next_changed = NEXT(changed, link);
1541                         rbtnode = changed->node;
1542                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
1543
1544                         NODE_LOCK(lock, isc_rwlocktype_write);
1545                         if (rollback)
1546                                 rollback_node(rbtnode, serial);
1547                         decrement_reference(rbtdb, rbtnode, least_serial,
1548                                             isc_rwlocktype_write,
1549                                             isc_rwlocktype_none);
1550                         NODE_UNLOCK(lock, isc_rwlocktype_write);
1551
1552                         isc_mem_put(rbtdb->common.mctx, changed,
1553                                     sizeof(*changed));
1554                 }
1555         }
1556
1557   end:
1558         *versionp = NULL;
1559 }
1560
1561 /*
1562  * Add the necessary magic for the wildcard name 'name'
1563  * to be found in 'rbtdb'.
1564  *
1565  * In order for wildcard matching to work correctly in
1566  * zone_find(), we must ensure that a node for the wildcarding
1567  * level exists in the database, and has its 'find_callback'
1568  * and 'wild' bits set.
1569  *
1570  * E.g. if the wildcard name is "*.sub.example." then we
1571  * must ensure that "sub.example." exists and is marked as
1572  * a wildcard level.
1573  */
1574 static isc_result_t
1575 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
1576         isc_result_t result;
1577         dns_name_t foundname;
1578         dns_offsets_t offsets;
1579         unsigned int n;
1580         dns_rbtnode_t *node = NULL;
1581
1582         dns_name_init(&foundname, offsets);
1583         n = dns_name_countlabels(name);
1584         INSIST(n >= 2);
1585         n--;
1586         dns_name_getlabelsequence(name, 1, n, &foundname);
1587         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
1588         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
1589                 return (result);
1590         node->find_callback = 1;
1591         node->wild = 1;
1592         return (ISC_R_SUCCESS);
1593 }
1594
1595 static isc_result_t
1596 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
1597         isc_result_t result;
1598         dns_name_t foundname;
1599         dns_offsets_t offsets;
1600         unsigned int n, l, i;
1601
1602         dns_name_init(&foundname, offsets);
1603         n = dns_name_countlabels(name);
1604         l = dns_name_countlabels(&rbtdb->common.origin);
1605         i = l + 1;
1606         while (i < n) {
1607                 dns_rbtnode_t *node = NULL;     /* dummy */
1608                 dns_name_getlabelsequence(name, n - i, i, &foundname);
1609                 if (dns_name_iswildcard(&foundname)) {
1610                         result = add_wildcard_magic(rbtdb, &foundname);
1611                         if (result != ISC_R_SUCCESS)
1612                                 return (result);
1613                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
1614                                                  &node);
1615                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
1616                                 return (result);
1617                 }
1618                 i++;
1619         }
1620         return (ISC_R_SUCCESS);
1621 }
1622
1623 static isc_result_t
1624 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
1625          dns_dbnode_t **nodep)
1626 {
1627         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1628         dns_rbtnode_t *node = NULL;
1629         dns_name_t nodename;
1630         isc_result_t result;
1631         isc_rwlocktype_t locktype = isc_rwlocktype_read;
1632
1633         REQUIRE(VALID_RBTDB(rbtdb));
1634
1635         dns_name_init(&nodename, NULL);
1636         RWLOCK(&rbtdb->tree_lock, locktype);
1637         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
1638                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
1639         if (result != ISC_R_SUCCESS) {
1640                 RWUNLOCK(&rbtdb->tree_lock, locktype);
1641                 if (!create) {
1642                         if (result == DNS_R_PARTIALMATCH)
1643                                 result = ISC_R_NOTFOUND;
1644                         return (result);
1645                 }
1646                 /*
1647                  * It would be nice to try to upgrade the lock instead of
1648                  * unlocking then relocking.
1649                  */
1650                 locktype = isc_rwlocktype_write;
1651                 RWLOCK(&rbtdb->tree_lock, locktype);
1652                 node = NULL;
1653                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
1654                 if (result == ISC_R_SUCCESS) {
1655                         dns_rbt_namefromnode(node, &nodename);
1656 #ifdef DNS_RBT_USEHASH
1657                         node->locknum = node->hashval % rbtdb->node_lock_count;
1658 #else
1659                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
1660                                 rbtdb->node_lock_count;
1661 #endif
1662                         add_empty_wildcards(rbtdb, name);
1663
1664                         if (dns_name_iswildcard(name)) {
1665                                 result = add_wildcard_magic(rbtdb, name);
1666                                 if (result != ISC_R_SUCCESS) {
1667                                         RWUNLOCK(&rbtdb->tree_lock, locktype);
1668                                         return (result);
1669                                 }
1670                         }
1671                 } else if (result != ISC_R_EXISTS) {
1672                         RWUNLOCK(&rbtdb->tree_lock, locktype);
1673                         return (result);
1674                 }
1675         }
1676         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1677         new_reference(rbtdb, node);
1678         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1679         RWUNLOCK(&rbtdb->tree_lock, locktype);
1680
1681         *nodep = (dns_dbnode_t *)node;
1682
1683         return (ISC_R_SUCCESS);
1684 }
1685
1686 static isc_result_t
1687 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
1688         rbtdb_search_t *search = arg;
1689         rdatasetheader_t *header, *header_next;
1690         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
1691         rdatasetheader_t *found;
1692         isc_result_t result;
1693         dns_rbtnode_t *onode;
1694
1695         /*
1696          * We only want to remember the topmost zone cut, since it's the one
1697          * that counts, so we'll just continue if we've already found a
1698          * zonecut.
1699          */
1700         if (search->zonecut != NULL)
1701                 return (DNS_R_CONTINUE);
1702
1703         found = NULL;
1704         result = DNS_R_CONTINUE;
1705         onode = search->rbtdb->origin_node;
1706
1707         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
1708                   isc_rwlocktype_read);
1709
1710         /*
1711          * Look for an NS or DNAME rdataset active in our version.
1712          */
1713         ns_header = NULL;
1714         dname_header = NULL;
1715         sigdname_header = NULL;
1716         for (header = node->data; header != NULL; header = header_next) {
1717                 header_next = header->next;
1718                 if (header->type == dns_rdatatype_ns ||
1719                     header->type == dns_rdatatype_dname ||
1720                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
1721                         do {
1722                                 if (header->serial <= search->serial &&
1723                                     !IGNORE(header)) {
1724                                         /*
1725                                          * Is this a "this rdataset doesn't
1726                                          * exist" record?
1727                                          */
1728                                         if (NONEXISTENT(header))
1729                                                 header = NULL;
1730                                         break;
1731                                 } else
1732                                         header = header->down;
1733                         } while (header != NULL);
1734                         if (header != NULL) {
1735                                 if (header->type == dns_rdatatype_dname)
1736                                         dname_header = header;
1737                                 else if (header->type == 
1738                                            RBTDB_RDATATYPE_SIGDNAME)
1739                                         sigdname_header = header;
1740                                 else if (node != onode ||
1741                                          IS_STUB(search->rbtdb)) {
1742                                         /*
1743                                          * We've found an NS rdataset that
1744                                          * isn't at the origin node.  We check
1745                                          * that they're not at the origin node,
1746                                          * because otherwise we'd erroneously
1747                                          * treat the zone top as if it were
1748                                          * a delegation.
1749                                          */
1750                                         ns_header = header;
1751                                 }
1752                         }
1753                 }
1754         }
1755
1756         /*
1757          * Did we find anything?
1758          */
1759         if (dname_header != NULL) {
1760                 /*
1761                  * Note that DNAME has precedence over NS if both exist.
1762                  */
1763                 found = dname_header;
1764                 search->zonecut_sigrdataset = sigdname_header;
1765         } else if (ns_header != NULL) {
1766                 found = ns_header;
1767                 search->zonecut_sigrdataset = NULL;
1768         }
1769
1770         if (found != NULL) {
1771                 /*
1772                  * We increment the reference count on node to ensure that
1773                  * search->zonecut_rdataset will still be valid later.
1774                  */
1775                 new_reference(search->rbtdb, node);
1776                 search->zonecut = node;
1777                 search->zonecut_rdataset = found;
1778                 search->need_cleanup = ISC_TRUE;
1779                 /*
1780                  * Since we've found a zonecut, anything beneath it is
1781                  * glue and is not subject to wildcard matching, so we
1782                  * may clear search->wild.
1783                  */
1784                 search->wild = ISC_FALSE;
1785                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
1786                         /*
1787                          * If the caller does not want to find glue, then
1788                          * this is the best answer and the search should
1789                          * stop now.
1790                          */
1791                         result = DNS_R_PARTIALMATCH;
1792                 } else {
1793                         dns_name_t *zcname;
1794
1795                         /*
1796                          * The search will continue beneath the zone cut.
1797                          * This may or may not be the best match.  In case it
1798                          * is, we need to remember the node name.
1799                          */
1800                         zcname = dns_fixedname_name(&search->zonecut_name);
1801                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
1802                                       ISC_R_SUCCESS);
1803                         search->copy_name = ISC_TRUE;
1804                 }
1805         } else {
1806                 /*
1807                  * There is no zonecut at this node which is active in this
1808                  * version.
1809                  *
1810                  * If this is a "wild" node and the caller hasn't disabled
1811                  * wildcard matching, remember that we've seen a wild node
1812                  * in case we need to go searching for wildcard matches
1813                  * later on.
1814                  */
1815                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
1816                         search->wild = ISC_TRUE;
1817         }
1818
1819         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
1820                     isc_rwlocktype_read);
1821
1822         return (result);
1823 }
1824
1825 static inline void
1826 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1827               rdatasetheader_t *header, isc_stdtime_t now,
1828               dns_rdataset_t *rdataset)
1829 {
1830         unsigned char *raw;     /* RDATASLAB */
1831
1832         /*
1833          * Caller must be holding the node reader lock.
1834          * XXXJT: technically, we need a writer lock, since we'll increment
1835          * the header count below.  However, since the actual counter value
1836          * doesn't matter, we prioritize performance here.  (We may want to
1837          * use atomic increment when available).
1838          */
1839
1840         if (rdataset == NULL)
1841                 return;
1842
1843         new_reference(rbtdb, node);
1844
1845         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
1846
1847         rdataset->methods = &rdataset_methods;
1848         rdataset->rdclass = rbtdb->common.rdclass;
1849         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
1850         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
1851         rdataset->ttl = header->ttl - now;
1852         rdataset->trust = header->trust;
1853         if (NXDOMAIN(header))
1854                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
1855         rdataset->private1 = rbtdb;
1856         rdataset->private2 = node;
1857         raw = (unsigned char *)header + sizeof(*header);
1858         rdataset->private3 = raw;
1859         rdataset->count = header->count++;
1860         if (header->count == ISC_UINT32_MAX)
1861                 header->count = 0;
1862
1863         /*
1864          * Reset iterator state.
1865          */
1866         rdataset->privateuint4 = 0;
1867         rdataset->private5 = NULL;
1868
1869         /*
1870          * Add noqname proof.
1871          */
1872         rdataset->private6 = header->noqname;
1873         if (rdataset->private6 != NULL)
1874                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
1875 }
1876
1877 static inline isc_result_t
1878 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
1879                  dns_name_t *foundname, dns_rdataset_t *rdataset,
1880                  dns_rdataset_t *sigrdataset)
1881 {
1882         isc_result_t result;
1883         dns_name_t *zcname;
1884         rbtdb_rdatatype_t type;
1885         dns_rbtnode_t *node;
1886
1887         /*
1888          * The caller MUST NOT be holding any node locks.
1889          */
1890
1891         node = search->zonecut;
1892         type = search->zonecut_rdataset->type;
1893
1894         /*
1895          * If we have to set foundname, we do it before anything else.
1896          * If we were to set foundname after we had set nodep or bound the
1897          * rdataset, then we'd have to undo that work if dns_name_copy()
1898          * failed.  By setting foundname first, there's nothing to undo if
1899          * we have trouble.
1900          */
1901         if (foundname != NULL && search->copy_name) {
1902                 zcname = dns_fixedname_name(&search->zonecut_name);
1903                 result = dns_name_copy(zcname, foundname, NULL);
1904                 if (result != ISC_R_SUCCESS)
1905                         return (result);
1906         }
1907         if (nodep != NULL) {
1908                 /*
1909                  * Note that we don't have to increment the node's reference
1910                  * count here because we're going to use the reference we
1911                  * already have in the search block.
1912                  */
1913                 *nodep = node;
1914                 search->need_cleanup = ISC_FALSE;
1915         }
1916         if (rdataset != NULL) {
1917                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
1918                           isc_rwlocktype_read);
1919                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
1920                               search->now, rdataset);
1921                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
1922                         bind_rdataset(search->rbtdb, node,
1923                                       search->zonecut_sigrdataset,
1924                                       search->now, sigrdataset);
1925                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
1926                             isc_rwlocktype_read);
1927         }
1928
1929         if (type == dns_rdatatype_dname)
1930                 return (DNS_R_DNAME);
1931         return (DNS_R_DELEGATION);
1932 }
1933
1934 static inline isc_boolean_t
1935 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
1936            dns_rbtnode_t *node)
1937 {
1938         unsigned char *raw;     /* RDATASLAB */
1939         unsigned int count, size;
1940         dns_name_t ns_name;
1941         isc_boolean_t valid = ISC_FALSE;
1942         dns_offsets_t offsets;
1943         isc_region_t region;
1944         rdatasetheader_t *header;
1945
1946         /*
1947          * No additional locking is required.
1948          */
1949
1950         /*
1951          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
1952          * if it occurs at a zone cut, but is not valid below it.
1953          */
1954         if (type == dns_rdatatype_ns) {
1955                 if (node != search->zonecut) {
1956                         return (ISC_FALSE);
1957                 }
1958         } else if (type != dns_rdatatype_a &&
1959                    type != dns_rdatatype_aaaa &&
1960                    type != dns_rdatatype_a6) {
1961                 return (ISC_FALSE);
1962         }
1963
1964         header = search->zonecut_rdataset;
1965         raw = (unsigned char *)header + sizeof(*header);
1966         count = raw[0] * 256 + raw[1];
1967         raw += 2 + (4 * count);
1968
1969         while (count > 0) {
1970                 count--;
1971                 size = raw[0] * 256 + raw[1];
1972                 raw += 4;
1973                 region.base = raw;
1974                 region.length = size;
1975                 raw += size;
1976                 /*
1977                  * XXX Until we have rdata structures, we have no choice but
1978                  * to directly access the rdata format.
1979                  */
1980                 dns_name_init(&ns_name, offsets);
1981                 dns_name_fromregion(&ns_name, &region);
1982                 if (dns_name_compare(&ns_name, name) == 0) {
1983                         valid = ISC_TRUE;
1984                         break;
1985                 }
1986         }
1987
1988         return (valid);
1989 }
1990
1991 static inline isc_boolean_t
1992 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
1993             dns_name_t *name)
1994 {
1995         dns_fixedname_t fnext;
1996         dns_fixedname_t forigin;
1997         dns_name_t *next;
1998         dns_name_t *origin;
1999         dns_name_t prefix;
2000         dns_rbtdb_t *rbtdb;
2001         dns_rbtnode_t *node;
2002         isc_result_t result;
2003         isc_boolean_t answer = ISC_FALSE;
2004         rdatasetheader_t *header;
2005
2006         rbtdb = search->rbtdb;
2007
2008         dns_name_init(&prefix, NULL);
2009         dns_fixedname_init(&fnext);
2010         next = dns_fixedname_name(&fnext);
2011         dns_fixedname_init(&forigin);
2012         origin = dns_fixedname_name(&forigin);
2013
2014         result = dns_rbtnodechain_next(chain, NULL, NULL);
2015         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2016                 node = NULL;
2017                 result = dns_rbtnodechain_current(chain, &prefix,
2018                                                   origin, &node);
2019                 if (result != ISC_R_SUCCESS)
2020                         break;
2021                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2022                           isc_rwlocktype_read);
2023                 for (header = node->data;
2024                      header != NULL;
2025                      header = header->next) {
2026                         if (header->serial <= search->serial &&
2027                             !IGNORE(header) && EXISTS(header))
2028                                 break;
2029                 }
2030                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2031                             isc_rwlocktype_read);
2032                 if (header != NULL)
2033                         break;
2034                 result = dns_rbtnodechain_next(chain, NULL, NULL);
2035         }
2036         if (result == ISC_R_SUCCESS)
2037                 result = dns_name_concatenate(&prefix, origin, next, NULL);
2038         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2039                 answer = ISC_TRUE;
2040         return (answer);
2041 }
2042
2043 static inline isc_boolean_t
2044 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2045         dns_fixedname_t fnext;
2046         dns_fixedname_t forigin;
2047         dns_fixedname_t fprev;
2048         dns_name_t *next;
2049         dns_name_t *origin;
2050         dns_name_t *prev;
2051         dns_name_t name;
2052         dns_name_t rname;
2053         dns_name_t tname;
2054         dns_rbtdb_t *rbtdb;
2055         dns_rbtnode_t *node;
2056         dns_rbtnodechain_t chain;
2057         isc_boolean_t check_next = ISC_TRUE;
2058         isc_boolean_t check_prev = ISC_TRUE;
2059         isc_boolean_t answer = ISC_FALSE;
2060         isc_result_t result;
2061         rdatasetheader_t *header;
2062         unsigned int n;
2063
2064         rbtdb = search->rbtdb;
2065
2066         dns_name_init(&name, NULL);
2067         dns_name_init(&tname, NULL);
2068         dns_name_init(&rname, NULL);
2069         dns_fixedname_init(&fnext);
2070         next = dns_fixedname_name(&fnext);
2071         dns_fixedname_init(&fprev);
2072         prev = dns_fixedname_name(&fprev);
2073         dns_fixedname_init(&forigin);
2074         origin = dns_fixedname_name(&forigin);
2075
2076         /*
2077          * Find if qname is at or below a empty node.
2078          * Use our own copy of the chain.
2079          */
2080
2081         chain = search->chain;
2082         do {
2083                 node = NULL;
2084                 result = dns_rbtnodechain_current(&chain, &name,
2085                                                   origin, &node);
2086                 if (result != ISC_R_SUCCESS)
2087                         break;
2088                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2089                           isc_rwlocktype_read);
2090                 for (header = node->data;
2091                      header != NULL;
2092                      header = header->next) {
2093                         if (header->serial <= search->serial &&
2094                             !IGNORE(header) && EXISTS(header))
2095                                 break;
2096                 }
2097                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2098                             isc_rwlocktype_read);
2099                 if (header != NULL)
2100                         break;
2101                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
2102         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
2103         if (result == ISC_R_SUCCESS)
2104                 result = dns_name_concatenate(&name, origin, prev, NULL);
2105         if (result != ISC_R_SUCCESS)
2106                 check_prev = ISC_FALSE;
2107
2108         result = dns_rbtnodechain_next(&chain, NULL, NULL);
2109         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2110                 node = NULL;
2111                 result = dns_rbtnodechain_current(&chain, &name,
2112                                                   origin, &node);
2113                 if (result != ISC_R_SUCCESS)
2114                         break;
2115                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2116                           isc_rwlocktype_read);
2117                 for (header = node->data;
2118                      header != NULL;
2119                      header = header->next) {
2120                         if (header->serial <= search->serial &&
2121                             !IGNORE(header) && EXISTS(header))
2122                                 break;
2123                 }
2124                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2125                             isc_rwlocktype_read);
2126                 if (header != NULL)
2127                         break;
2128                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2129         }
2130         if (result == ISC_R_SUCCESS)
2131                 result = dns_name_concatenate(&name, origin, next, NULL);
2132         if (result != ISC_R_SUCCESS)
2133                 check_next = ISC_FALSE;
2134
2135         dns_name_clone(qname, &rname);
2136
2137         /*
2138          * Remove the wildcard label to find the terminal name.
2139          */
2140         n = dns_name_countlabels(wname);
2141         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
2142
2143         do {
2144                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
2145                     (check_next && dns_name_issubdomain(next, &rname))) {
2146                         answer = ISC_TRUE;
2147                         break;
2148                 }
2149                 /*
2150                  * Remove the left hand label.
2151                  */
2152                 n = dns_name_countlabels(&rname);
2153                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
2154         } while (!dns_name_equal(&rname, &tname));
2155         return (answer);
2156 }
2157
2158 static inline isc_result_t
2159 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
2160               dns_name_t *qname)
2161 {
2162         unsigned int i, j;
2163         dns_rbtnode_t *node, *level_node, *wnode;
2164         rdatasetheader_t *header;
2165         isc_result_t result = ISC_R_NOTFOUND;
2166         dns_name_t name;
2167         dns_name_t *wname;
2168         dns_fixedname_t fwname;
2169         dns_rbtdb_t *rbtdb;
2170         isc_boolean_t done, wild, active;
2171         dns_rbtnodechain_t wchain;
2172
2173         /*
2174          * Caller must be holding the tree lock and MUST NOT be holding
2175          * any node locks.
2176          */
2177
2178         /*
2179          * Examine each ancestor level.  If the level's wild bit
2180          * is set, then construct the corresponding wildcard name and
2181          * search for it.  If the wildcard node exists, and is active in
2182          * this version, we're done.  If not, then we next check to see
2183          * if the ancestor is active in this version.  If so, then there
2184          * can be no possible wildcard match and again we're done.  If not,
2185          * continue the search.
2186          */
2187
2188         rbtdb = search->rbtdb;
2189         i = search->chain.level_matches;
2190         done = ISC_FALSE;
2191         node = *nodep;
2192         do {
2193                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2194                           isc_rwlocktype_read);
2195
2196                 /*
2197                  * First we try to figure out if this node is active in
2198                  * the search's version.  We do this now, even though we
2199                  * may not need the information, because it simplifies the
2200                  * locking and code flow.
2201                  */
2202                 for (header = node->data;
2203                      header != NULL;
2204                      header = header->next) {
2205                         if (header->serial <= search->serial &&
2206                             !IGNORE(header) && EXISTS(header))
2207                                 break;
2208                 }
2209                 if (header != NULL)
2210                         active = ISC_TRUE;
2211                 else
2212                         active = ISC_FALSE;
2213
2214                 if (node->wild)
2215                         wild = ISC_TRUE;
2216                 else
2217                         wild = ISC_FALSE;
2218
2219                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2220                             isc_rwlocktype_read);
2221
2222                 if (wild) {
2223                         /*
2224                          * Construct the wildcard name for this level.
2225                          */
2226                         dns_name_init(&name, NULL);
2227                         dns_rbt_namefromnode(node, &name);
2228                         dns_fixedname_init(&fwname);
2229                         wname = dns_fixedname_name(&fwname);
2230                         result = dns_name_concatenate(dns_wildcardname, &name,
2231                                                       wname, NULL);
2232                         j = i;
2233                         while (result == ISC_R_SUCCESS && j != 0) {
2234                                 j--;
2235                                 level_node = search->chain.levels[j];
2236                                 dns_name_init(&name, NULL);
2237                                 dns_rbt_namefromnode(level_node, &name);
2238                                 result = dns_name_concatenate(wname,
2239                                                               &name,
2240                                                               wname,
2241                                                               NULL);
2242                         }
2243                         if (result != ISC_R_SUCCESS)
2244                                 break;
2245
2246                         wnode = NULL;
2247                         dns_rbtnodechain_init(&wchain, NULL);
2248                         result = dns_rbt_findnode(rbtdb->tree, wname,
2249                                                   NULL, &wnode, &wchain,
2250                                                   DNS_RBTFIND_EMPTYDATA,
2251                                                   NULL, NULL);
2252                         if (result == ISC_R_SUCCESS) {
2253                                 nodelock_t *lock;
2254
2255                                 /*
2256                                  * We have found the wildcard node.  If it
2257                                  * is active in the search's version, we're
2258                                  * done.
2259                                  */
2260                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
2261                                 NODE_LOCK(lock, isc_rwlocktype_read);
2262                                 for (header = wnode->data;
2263                                      header != NULL;
2264                                      header = header->next) {
2265                                         if (header->serial <= search->serial &&
2266                                             !IGNORE(header) && EXISTS(header))
2267                                                 break;
2268                                 }
2269                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
2270                                 if (header != NULL ||
2271                                     activeempty(search, &wchain, wname)) {
2272                                         if (activeemtpynode(search, qname,
2273                                                             wname)) {
2274                                                 return (ISC_R_NOTFOUND);
2275                                         }
2276                                         /*
2277                                          * The wildcard node is active!
2278                                          *
2279                                          * Note: result is still ISC_R_SUCCESS
2280                                          * so we don't have to set it.
2281                                          */
2282                                         *nodep = wnode;
2283                                         break;
2284                                 }
2285                         } else if (result != ISC_R_NOTFOUND &&
2286                                    result != DNS_R_PARTIALMATCH) {
2287                                 /*
2288                                  * An error has occurred.  Bail out.
2289                                  */
2290                                 break;
2291                         }
2292                 }
2293
2294                 if (active) {
2295                         /*
2296                          * The level node is active.  Any wildcarding
2297                          * present at higher levels has no
2298                          * effect and we're done.
2299                          */
2300                         result = ISC_R_NOTFOUND;
2301                         break;
2302                 }
2303
2304                 if (i > 0) {
2305                         i--;
2306                         node = search->chain.levels[i];
2307                 } else
2308                         done = ISC_TRUE;
2309         } while (!done);
2310
2311         return (result);
2312 }
2313
2314 static inline isc_result_t
2315 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
2316                   dns_name_t *foundname, dns_rdataset_t *rdataset,
2317                   dns_rdataset_t *sigrdataset, isc_boolean_t need_sig)
2318 {
2319         dns_rbtnode_t *node;
2320         rdatasetheader_t *header, *header_next, *found, *foundsig;
2321         isc_boolean_t empty_node;
2322         isc_result_t result;
2323         dns_fixedname_t fname, forigin;
2324         dns_name_t *name, *origin;
2325
2326         do {
2327                 node = NULL;
2328                 dns_fixedname_init(&fname);
2329                 name = dns_fixedname_name(&fname);
2330                 dns_fixedname_init(&forigin);
2331                 origin = dns_fixedname_name(&forigin);
2332                 result = dns_rbtnodechain_current(&search->chain, name,
2333                                                   origin, &node);
2334                 if (result != ISC_R_SUCCESS)
2335                         return (result);
2336                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2337                           isc_rwlocktype_read);
2338                 found = NULL;
2339                 foundsig = NULL;
2340                 empty_node = ISC_TRUE;
2341                 for (header = node->data;
2342                      header != NULL;
2343                      header = header_next) {
2344                         header_next = header->next;
2345                         /*
2346                          * Look for an active, extant NSEC or RRSIG NSEC.
2347                          */
2348                         do {
2349                                 if (header->serial <= search->serial &&
2350                                     !IGNORE(header)) {
2351                                         /*
2352                                          * Is this a "this rdataset doesn't
2353                                          * exist" record?
2354                                          */
2355                                         if (NONEXISTENT(header))
2356                                                 header = NULL;
2357                                         break;
2358                                 } else
2359                                         header = header->down;
2360                         } while (header != NULL);
2361                         if (header != NULL) {
2362                                 /*
2363                                  * We now know that there is at least one
2364                                  * active rdataset at this node.
2365                                  */
2366                                 empty_node = ISC_FALSE;
2367                                 if (header->type == dns_rdatatype_nsec) {
2368                                         found = header;
2369                                         if (foundsig != NULL)
2370                                                 break;
2371                                 } else if (header->type ==
2372                                            RBTDB_RDATATYPE_SIGNSEC) {
2373                                         foundsig = header;
2374                                         if (found != NULL)
2375                                                 break;
2376                                 }
2377                         }
2378                 }
2379                 if (!empty_node) {
2380                         if (found != NULL &&
2381                             (foundsig != NULL || !need_sig))
2382                         {
2383                                 /*
2384                                  * We've found the right NSEC record.
2385                                  *
2386                                  * Note: for this to really be the right
2387                                  * NSEC record, it's essential that the NSEC
2388                                  * records of any nodes obscured by a zone
2389                                  * cut have been removed; we assume this is
2390                                  * the case.
2391                                  */
2392                                 result = dns_name_concatenate(name, origin,
2393                                                               foundname, NULL);
2394                                 if (result == ISC_R_SUCCESS) {
2395                                         if (nodep != NULL) {
2396                                                 new_reference(search->rbtdb,
2397                                                               node);
2398                                                 *nodep = node;
2399                                         }
2400                                         bind_rdataset(search->rbtdb, node,
2401                                                       found, search->now,
2402                                                       rdataset);
2403                                         if (foundsig != NULL)
2404                                                 bind_rdataset(search->rbtdb,
2405                                                               node,
2406                                                               foundsig,
2407                                                               search->now,
2408                                                               sigrdataset);
2409                                 }
2410                         } else if (found == NULL && foundsig == NULL) {
2411                                 /*
2412                                  * This node is active, but has no NSEC or
2413                                  * RRSIG NSEC.  That means it's glue or
2414                                  * other obscured zone data that isn't
2415                                  * relevant for our search.  Treat the
2416                                  * node as if it were empty and keep looking.
2417                                  */
2418                                 empty_node = ISC_TRUE;
2419                                 result = dns_rbtnodechain_prev(&search->chain,
2420                                                                NULL, NULL);
2421                         } else {
2422                                 /*
2423                                  * We found an active node, but either the
2424                                  * NSEC or the RRSIG NSEC is missing.  This
2425                                  * shouldn't happen.
2426                                  */
2427                                 result = DNS_R_BADDB;
2428                         }
2429                 } else {
2430                         /*
2431                          * This node isn't active.  We've got to keep
2432                          * looking.
2433                          */
2434                         result = dns_rbtnodechain_prev(&search->chain, NULL,
2435                                                        NULL);
2436                 }
2437                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2438                             isc_rwlocktype_read);
2439         } while (empty_node && result == ISC_R_SUCCESS);
2440
2441         /*
2442          * If the result is ISC_R_NOMORE, then we got to the beginning of
2443          * the database and didn't find a NSEC record.  This shouldn't
2444          * happen.
2445          */
2446         if (result == ISC_R_NOMORE)
2447                 result = DNS_R_BADDB;
2448
2449         return (result);
2450 }
2451
2452 static isc_result_t
2453 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
2454           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
2455           dns_dbnode_t **nodep, dns_name_t *foundname,
2456           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
2457 {
2458         dns_rbtnode_t *node = NULL;
2459         isc_result_t result;
2460         rbtdb_search_t search;
2461         isc_boolean_t cname_ok = ISC_TRUE;
2462         isc_boolean_t close_version = ISC_FALSE;
2463         isc_boolean_t maybe_zonecut = ISC_FALSE;
2464         isc_boolean_t at_zonecut = ISC_FALSE;
2465         isc_boolean_t wild;
2466         isc_boolean_t empty_node;
2467         rdatasetheader_t *header, *header_next, *found, *nsecheader;
2468         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
2469         rbtdb_rdatatype_t sigtype;
2470         isc_boolean_t active;
2471         dns_rbtnodechain_t chain;
2472         nodelock_t *lock;
2473
2474
2475         search.rbtdb = (dns_rbtdb_t *)db;
2476
2477         REQUIRE(VALID_RBTDB(search.rbtdb));
2478
2479         /*
2480          * We don't care about 'now'.
2481          */
2482         UNUSED(now);
2483
2484         /*
2485          * If the caller didn't supply a version, attach to the current
2486          * version.
2487          */
2488         if (version == NULL) {
2489                 currentversion(db, &version);
2490                 close_version = ISC_TRUE;
2491         }
2492
2493         search.rbtversion = version;
2494         search.serial = search.rbtversion->serial;
2495         search.options = options;
2496         search.copy_name = ISC_FALSE;
2497         search.need_cleanup = ISC_FALSE;
2498         search.wild = ISC_FALSE;
2499         search.zonecut = NULL;
2500         dns_fixedname_init(&search.zonecut_name);
2501         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
2502         search.now = 0;
2503
2504         /*
2505          * 'wild' will be true iff. we've matched a wildcard.
2506          */
2507         wild = ISC_FALSE;
2508
2509         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
2510
2511         /*
2512          * Search down from the root of the tree.  If, while going down, we
2513          * encounter a callback node, zone_zonecut_callback() will search the
2514          * rdatasets at the zone cut for active DNAME or NS rdatasets.
2515          */
2516         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
2517                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
2518                                   zone_zonecut_callback, &search);
2519
2520         if (result == DNS_R_PARTIALMATCH) {
2521         partial_match:
2522                 if (search.zonecut != NULL) {
2523                     result = setup_delegation(&search, nodep, foundname,
2524                                               rdataset, sigrdataset);
2525                     goto tree_exit;
2526                 }
2527
2528                 if (search.wild) {
2529                         /*
2530                          * At least one of the levels in the search chain
2531                          * potentially has a wildcard.  For each such level,
2532                          * we must see if there's a matching wildcard active
2533                          * in the current version.
2534                          */
2535                         result = find_wildcard(&search, &node, name);
2536                         if (result == ISC_R_SUCCESS) {
2537                                 result = dns_name_copy(name, foundname, NULL);
2538                                 if (result != ISC_R_SUCCESS)
2539                                         goto tree_exit;
2540                                 wild = ISC_TRUE;
2541                                 goto found;
2542                         }
2543                         else if (result != ISC_R_NOTFOUND)
2544                                 goto tree_exit;
2545                 }
2546
2547                 chain = search.chain;
2548                 active = activeempty(&search, &chain, name);
2549
2550                 /*
2551                  * If we're here, then the name does not exist, is not
2552                  * beneath a zonecut, and there's no matching wildcard.
2553                  */
2554                 if (search.rbtdb->secure ||
2555                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
2556                 {
2557                         result = find_closest_nsec(&search, nodep, foundname,
2558                                                   rdataset, sigrdataset,
2559                                                   search.rbtdb->secure);
2560                         if (result == ISC_R_SUCCESS)
2561                                 result = active ? DNS_R_EMPTYNAME :
2562                                                   DNS_R_NXDOMAIN;
2563                 } else
2564                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
2565                 goto tree_exit;
2566         } else if (result != ISC_R_SUCCESS)
2567                 goto tree_exit;
2568
2569  found:
2570         /*
2571          * We have found a node whose name is the desired name, or we
2572          * have matched a wildcard.
2573          */
2574
2575         if (search.zonecut != NULL) {
2576                 /*
2577                  * If we're beneath a zone cut, we don't want to look for
2578                  * CNAMEs because they're not legitimate zone glue.
2579                  */
2580                 cname_ok = ISC_FALSE;
2581         } else {
2582                 /*
2583                  * The node may be a zone cut itself.  If it might be one,
2584                  * make sure we check for it later.
2585                  */
2586                 if (node->find_callback &&
2587                     (node != search.rbtdb->origin_node ||
2588                      IS_STUB(search.rbtdb)) &&
2589                     !dns_rdatatype_atparent(type))
2590                         maybe_zonecut = ISC_TRUE;
2591         }
2592
2593         /*
2594          * Certain DNSSEC types are not subject to CNAME matching
2595          * (RFC4035, section 2.5 and RFC3007).
2596          *
2597          * We don't check for RRSIG, because we don't store RRSIG records
2598          * directly.
2599          */
2600         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
2601                 cname_ok = ISC_FALSE;
2602
2603         /*
2604          * We now go looking for rdata...
2605          */
2606
2607         NODE_LOCK(&(search.rbtdb->node_locks[node->locknum].lock),
2608                   isc_rwlocktype_read);
2609
2610         found = NULL;
2611         foundsig = NULL;
2612         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
2613         nsecheader = NULL;
2614         nsecsig = NULL;
2615         cnamesig = NULL;
2616         empty_node = ISC_TRUE;
2617         for (header = node->data; header != NULL; header = header_next) {
2618                 header_next = header->next;
2619                 /*
2620                  * Look for an active, extant rdataset.
2621                  */
2622                 do {
2623                         if (header->serial <= search.serial &&
2624                             !IGNORE(header)) {
2625                                 /*
2626                                  * Is this a "this rdataset doesn't
2627                                  * exist" record?
2628                                  */
2629                                 if (NONEXISTENT(header))
2630                                         header = NULL;
2631                                 break;
2632                         } else
2633                                 header = header->down;
2634                 } while (header != NULL);
2635                 if (header != NULL) {
2636                         /*
2637                          * We now know that there is at least one active
2638                          * rdataset at this node.
2639                          */
2640                         empty_node = ISC_FALSE;
2641
2642                         /*
2643                          * Do special zone cut handling, if requested.
2644                          */
2645                         if (maybe_zonecut &&
2646                             header->type == dns_rdatatype_ns) {
2647                                 /*
2648                                  * We increment the reference count on node to
2649                                  * ensure that search->zonecut_rdataset will
2650                                  * still be valid later.
2651                                  */
2652                                 new_reference(search.rbtdb, node);
2653                                 search.zonecut = node;
2654                                 search.zonecut_rdataset = header;
2655                                 search.zonecut_sigrdataset = NULL;
2656                                 search.need_cleanup = ISC_TRUE;
2657                                 maybe_zonecut = ISC_FALSE;
2658                                 at_zonecut = ISC_TRUE;
2659                                 /*
2660                                  * It is not clear if KEY should still be
2661                                  * allowed at the parent side of the zone
2662                                  * cut or not.  It is needed for RFC3007
2663                                  * validated updates.
2664                                  */
2665                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
2666                                     && type != dns_rdatatype_nsec
2667                                     && type != dns_rdatatype_key) {
2668                                         /*
2669                                          * Glue is not OK, but any answer we
2670                                          * could return would be glue.  Return
2671                                          * the delegation.
2672                                          */
2673                                         found = NULL;
2674                                         break;
2675                                 }
2676                                 if (found != NULL && foundsig != NULL)
2677                                         break;
2678                         }
2679
2680                         /*
2681                          * If we found a type we were looking for,
2682                          * remember it.
2683                          */
2684                         if (header->type == type ||
2685                             type == dns_rdatatype_any ||
2686                             (header->type == dns_rdatatype_cname &&
2687                              cname_ok)) {
2688                                 /*
2689                                  * We've found the answer!
2690                                  */
2691                                 found = header;
2692                                 if (header->type == dns_rdatatype_cname &&
2693                                     cname_ok) {
2694                                         /*
2695                                          * We may be finding a CNAME instead
2696                                          * of the desired type.
2697                                          *
2698                                          * If we've already got the CNAME RRSIG,
2699                                          * use it, otherwise change sigtype
2700                                          * so that we find it.
2701                                          */
2702                                         if (cnamesig != NULL)
2703                                                 foundsig = cnamesig;
2704                                         else
2705                                                 sigtype =
2706                                                     RBTDB_RDATATYPE_SIGCNAME;
2707                                 }
2708                                 /*
2709                                  * If we've got all we need, end the search.
2710                                  */
2711                                 if (!maybe_zonecut && foundsig != NULL)
2712                                         break;
2713                         } else if (header->type == sigtype) {
2714                                 /*
2715                                  * We've found the RRSIG rdataset for our
2716                                  * target type.  Remember it.
2717                                  */
2718                                 foundsig = header;
2719                                 /*
2720                                  * If we've got all we need, end the search.
2721                                  */
2722                                 if (!maybe_zonecut && found != NULL)
2723                                         break;
2724                         } else if (header->type == dns_rdatatype_nsec) {
2725                                 /*
2726                                  * Remember a NSEC rdataset even if we're
2727                                  * not specifically looking for it, because
2728                                  * we might need it later.
2729                                  */
2730                                 nsecheader = header;
2731                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC) {
2732                                 /*
2733                                  * If we need the NSEC rdataset, we'll also
2734                                  * need its signature.
2735                                  */
2736                                 nsecsig = header;
2737                         } else if (cname_ok &&
2738                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
2739                                 /*
2740                                  * If we get a CNAME match, we'll also need
2741                                  * its signature.
2742                                  */
2743                                 cnamesig = header;
2744                         }
2745                 }
2746         }
2747
2748         if (empty_node) {
2749                 /*
2750                  * We have an exact match for the name, but there are no
2751                  * active rdatasets in the desired version.  That means that
2752                  * this node doesn't exist in the desired version, and that
2753                  * we really have a partial match.
2754                  */
2755                 if (!wild) {
2756                         lock = &search.rbtdb->node_locks[node->locknum].lock;
2757                         NODE_UNLOCK(lock, isc_rwlocktype_read);
2758                         goto partial_match;
2759                 }
2760         }
2761
2762         /*
2763          * If we didn't find what we were looking for...
2764          */
2765         if (found == NULL) {
2766                 if (search.zonecut != NULL) {
2767                         /*
2768                          * We were trying to find glue at a node beneath a
2769                          * zone cut, but didn't.
2770                          *
2771                          * Return the delegation.
2772                          */
2773                         lock = &search.rbtdb->node_locks[node->locknum].lock;
2774                         NODE_UNLOCK(lock, isc_rwlocktype_read);
2775                         result = setup_delegation(&search, nodep, foundname,
2776                                                   rdataset, sigrdataset);
2777                         goto tree_exit;
2778                 }
2779                 /*
2780                  * The desired type doesn't exist.
2781                  */
2782                 result = DNS_R_NXRRSET;
2783                 if (search.rbtdb->secure &&
2784                     (nsecheader == NULL || nsecsig == NULL)) {
2785                         /*
2786                          * The zone is secure but there's no NSEC,
2787                          * or the NSEC has no signature!
2788                          */
2789                         if (!wild) {
2790                                 result = DNS_R_BADDB;
2791                                 goto node_exit;
2792                         }
2793
2794                         lock = &search.rbtdb->node_locks[node->locknum].lock;
2795                         NODE_UNLOCK(lock, isc_rwlocktype_read);
2796                         result = find_closest_nsec(&search, nodep, foundname,
2797                                                    rdataset, sigrdataset,
2798                                                    search.rbtdb->secure);
2799                         if (result == ISC_R_SUCCESS)
2800                                 result = DNS_R_EMPTYWILD;
2801                         goto tree_exit;
2802                 }
2803                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
2804                     nsecheader == NULL)
2805                 {
2806                         /*
2807                          * There's no NSEC record, and we were told
2808                          * to find one.
2809                          */
2810                         result = DNS_R_BADDB;
2811                         goto node_exit;
2812                 }
2813                 if (nodep != NULL) {
2814                         new_reference(search.rbtdb, node);
2815                         *nodep = node;
2816                 }
2817                 if (search.rbtdb->secure ||
2818                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
2819                 {
2820                         bind_rdataset(search.rbtdb, node, nsecheader,
2821                                       0, rdataset);
2822                         if (nsecsig != NULL)
2823                                 bind_rdataset(search.rbtdb, node,
2824                                               nsecsig, 0, sigrdataset);
2825                 }
2826                 if (wild)
2827                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
2828                 goto node_exit;
2829         }
2830
2831         /*
2832          * We found what we were looking for, or we found a CNAME.
2833          */
2834
2835         if (type != found->type &&
2836             type != dns_rdatatype_any &&
2837             found->type == dns_rdatatype_cname) {
2838                 /*
2839                  * We weren't doing an ANY query and we found a CNAME instead
2840                  * of the type we were looking for, so we need to indicate
2841                  * that result to the caller.
2842                  */
2843                 result = DNS_R_CNAME;
2844         } else if (search.zonecut != NULL) {
2845                 /*
2846                  * If we're beneath a zone cut, we must indicate that the
2847                  * result is glue, unless we're actually at the zone cut
2848                  * and the type is NSEC or KEY.
2849                  */
2850                 if (search.zonecut == node) {
2851                         /*
2852                          * It is not clear if KEY should still be
2853                          * allowed at the parent side of the zone
2854                          * cut or not.  It is needed for RFC3007
2855                          * validated updates.
2856                          */
2857                         if (type == dns_rdatatype_nsec ||
2858                             type == dns_rdatatype_key)
2859                                 result = ISC_R_SUCCESS;
2860                         else if (type == dns_rdatatype_any)
2861                                 result = DNS_R_ZONECUT;
2862                         else
2863                                 result = DNS_R_GLUE;
2864                 } else
2865                         result = DNS_R_GLUE;
2866                 /*
2867                  * We might have found data that isn't glue, but was occluded
2868                  * by a dynamic update.  If the caller cares about this, they
2869                  * will have told us to validate glue.
2870                  *
2871                  * XXX We should cache the glue validity state!
2872                  */
2873                 if (result == DNS_R_GLUE &&
2874                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
2875                     !valid_glue(&search, foundname, type, node)) {
2876                         lock = &search.rbtdb->node_locks[node->locknum].lock;
2877                         NODE_UNLOCK(lock, isc_rwlocktype_read);
2878                         result = setup_delegation(&search, nodep, foundname,
2879                                                   rdataset, sigrdataset);
2880                     goto tree_exit;
2881                 }
2882         } else {
2883                 /*
2884                  * An ordinary successful query!
2885                  */
2886                 result = ISC_R_SUCCESS;
2887         }
2888
2889         if (nodep != NULL) {
2890                 if (!at_zonecut)
2891                         new_reference(search.rbtdb, node);
2892                 else
2893                         search.need_cleanup = ISC_FALSE;
2894                 *nodep = node;
2895         }
2896
2897         if (type != dns_rdatatype_any) {
2898                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
2899                 if (foundsig != NULL)
2900                         bind_rdataset(search.rbtdb, node, foundsig, 0,
2901                                       sigrdataset);
2902         }
2903
2904         if (wild)
2905                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
2906
2907  node_exit:
2908         NODE_UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock),
2909                     isc_rwlocktype_read);
2910
2911  tree_exit:
2912         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
2913
2914         /*
2915          * If we found a zonecut but aren't going to use it, we have to
2916          * let go of it.
2917          */
2918         if (search.need_cleanup) {
2919                 node = search.zonecut;
2920                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
2921
2922                 NODE_LOCK(lock, isc_rwlocktype_read);
2923                 decrement_reference(search.rbtdb, node, 0,
2924                                     isc_rwlocktype_read, isc_rwlocktype_none);
2925                 NODE_UNLOCK(lock, isc_rwlocktype_read);
2926         }
2927
2928         if (close_version)
2929                 closeversion(db, &version, ISC_FALSE);
2930
2931         dns_rbtnodechain_reset(&search.chain);
2932
2933         return (result);
2934 }
2935
2936 static isc_result_t
2937 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
2938                  isc_stdtime_t now, dns_dbnode_t **nodep,
2939                  dns_name_t *foundname,
2940                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
2941 {
2942         UNUSED(db);
2943         UNUSED(name);
2944         UNUSED(options);
2945         UNUSED(now);
2946         UNUSED(nodep);
2947         UNUSED(foundname);
2948         UNUSED(rdataset);
2949         UNUSED(sigrdataset);
2950
2951         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
2952
2953         return (ISC_R_NOTIMPLEMENTED);
2954 }
2955
2956 static isc_result_t
2957 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2958         rbtdb_search_t *search = arg;
2959         rdatasetheader_t *header, *header_prev, *header_next;
2960         rdatasetheader_t *dname_header, *sigdname_header;
2961         isc_result_t result;
2962         nodelock_t *lock;
2963         isc_rwlocktype_t locktype;
2964
2965         /* XXX comment */
2966
2967         REQUIRE(search->zonecut == NULL);
2968
2969         /*
2970          * Keep compiler silent.
2971          */
2972         UNUSED(name);
2973
2974         lock = &(search->rbtdb->node_locks[node->locknum].lock);
2975         locktype = isc_rwlocktype_read; 
2976         NODE_LOCK(lock, locktype);
2977
2978         /*
2979          * Look for a DNAME or RRSIG DNAME rdataset.
2980          */
2981         dname_header = NULL;
2982         sigdname_header = NULL;
2983         header_prev = NULL;
2984         for (header = node->data; header != NULL; header = header_next) {
2985                 header_next = header->next;
2986                 if (header->ttl <= search->now) {
2987                         /*
2988                          * This rdataset is stale.  If no one else is
2989                          * using the node, we can clean it up right
2990                          * now, otherwise we mark it as stale, and
2991                          * the node as dirty, so it will get cleaned
2992                          * up later.
2993                          */
2994                         if ((header->ttl <= search->now - RBTDB_VIRTUAL) &&
2995                             (locktype == isc_rwlocktype_write ||
2996                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
2997                                 /*
2998                                  * We update the node's status only when we
2999                                  * can get write access; otherwise, we leave
3000                                  * others to this work.  Periodical cleaning
3001                                  * will eventually take the job as the last
3002                                  * resort.
3003                                  * We won't downgrade the lock, since other
3004                                  * rdatasets are probably stale, too. 
3005                                  */
3006                                 locktype = isc_rwlocktype_write;
3007
3008                                 if (dns_rbtnode_refcurrent(node) == 0) {
3009                                         isc_mem_t *mctx;
3010
3011                                         /*
3012                                          * header->down can be non-NULL if the
3013                                          * refcount has just decremented to 0
3014                                          * but decrement_reference() has not
3015                                          * performed clean_cache_node(), in
3016                                          * which case we need to purge the
3017                                          * stale headers first.
3018                                          */
3019                                         mctx = search->rbtdb->common.mctx;
3020                                         clean_stale_headers(mctx, header);
3021                                         if (header_prev != NULL)
3022                                                 header_prev->next =
3023                                                         header->next;
3024                                         else
3025                                                 node->data = header->next;
3026                                         free_rdataset(mctx, header);
3027                                 } else {
3028                                         header->attributes |=
3029                                                 RDATASET_ATTR_STALE;
3030                                         node->dirty = 1;
3031                                         header_prev = header;
3032                                 }
3033                         } else
3034                                 header_prev = header;
3035                 } else if (header->type == dns_rdatatype_dname &&
3036                            EXISTS(header)) {
3037                         dname_header = header;
3038                         header_prev = header;
3039                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
3040                          EXISTS(header)) {
3041                         sigdname_header = header;
3042                         header_prev = header;
3043                 } else
3044                         header_prev = header;
3045         }
3046
3047         if (dname_header != NULL &&
3048             (dname_header->trust != dns_trust_pending ||
3049              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
3050                 /*
3051                  * We increment the reference count on node to ensure that
3052                  * search->zonecut_rdataset will still be valid later.
3053                  */
3054                 new_reference(search->rbtdb, node);
3055                 search->zonecut = node;
3056                 search->zonecut_rdataset = dname_header;
3057                 search->zonecut_sigrdataset = sigdname_header;
3058                 search->need_cleanup = ISC_TRUE;
3059                 result = DNS_R_PARTIALMATCH;
3060         } else
3061                 result = DNS_R_CONTINUE;
3062
3063         NODE_UNLOCK(lock, locktype);
3064
3065         return (result);
3066 }
3067
3068 static inline isc_result_t
3069 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
3070                      dns_dbnode_t **nodep, dns_name_t *foundname,
3071                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3072 {
3073         unsigned int i;
3074         dns_rbtnode_t *level_node;
3075         rdatasetheader_t *header, *header_prev, *header_next;
3076         rdatasetheader_t *found, *foundsig;
3077         isc_result_t result = ISC_R_NOTFOUND;
3078         dns_name_t name;
3079         dns_rbtdb_t *rbtdb;
3080         isc_boolean_t done;
3081         nodelock_t *lock;
3082         isc_rwlocktype_t locktype;
3083
3084         /*
3085          * Caller must be holding the tree lock.
3086          */
3087
3088         rbtdb = search->rbtdb;
3089         i = search->chain.level_matches;
3090         done = ISC_FALSE;
3091         do {
3092                 locktype = isc_rwlocktype_read;
3093                 lock = &rbtdb->node_locks[node->locknum].lock;
3094                 NODE_LOCK(lock, locktype);
3095
3096                 /*
3097                  * Look for NS and RRSIG NS rdatasets.
3098                  */
3099                 found = NULL;
3100                 foundsig = NULL;
3101                 header_prev = NULL;
3102                 for (header = node->data;
3103                      header != NULL;
3104                      header = header_next) {
3105                         header_next = header->next;
3106                         if (header->ttl <= search->now) {
3107                                 /*
3108                                  * This rdataset is stale.  If no one else is
3109                                  * using the node, we can clean it up right
3110                                  * now, otherwise we mark it as stale, and
3111                                  * the node as dirty, so it will get cleaned
3112                                  * up later.
3113                                  */
3114                                 if ((header->ttl <= search->now -
3115                                                     RBTDB_VIRTUAL) &&
3116                                     (locktype == isc_rwlocktype_write ||
3117                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3118                                         /*
3119                                          * We update the node's status only
3120                                          * when we can get write access.
3121                                          */
3122                                         locktype = isc_rwlocktype_write;
3123
3124                                         if (dns_rbtnode_refcurrent(node)
3125                                             == 0) {
3126                                                 isc_mem_t *m;
3127
3128                                                 m = search->rbtdb->common.mctx;
3129                                                 clean_stale_headers(m, header);
3130                                                 if (header_prev != NULL)
3131                                                         header_prev->next =
3132                                                                 header->next;
3133                                                 else
3134                                                         node->data =
3135                                                                 header->next;
3136                                                 free_rdataset(m, header);
3137                                         } else {
3138                                                 header->attributes |=
3139                                                         RDATASET_ATTR_STALE;
3140                                                 node->dirty = 1;
3141                                                 header_prev = header;
3142                                         }
3143                                 } else
3144                                         header_prev = header;
3145                         } else if (EXISTS(header)) {
3146                                 /*
3147                                  * We've found an extant rdataset.  See if
3148                                  * we're interested in it.
3149                                  */
3150                                 if (header->type == dns_rdatatype_ns) {
3151                                         found = header;
3152                                         if (foundsig != NULL)
3153                                                 break;
3154                                 } else if (header->type ==
3155                                            RBTDB_RDATATYPE_SIGNS) {
3156                                         foundsig = header;
3157                                         if (found != NULL)
3158                                                 break;
3159                                 }
3160                                 header_prev = header;
3161                         } else
3162                                 header_prev = header;
3163                 }
3164
3165                 if (found != NULL) {
3166                         /*
3167                          * If we have to set foundname, we do it before
3168                          * anything else.  If we were to set foundname after
3169                          * we had set nodep or bound the rdataset, then we'd
3170                          * have to undo that work if dns_name_concatenate()
3171                          * failed.  By setting foundname first, there's
3172                          * nothing to undo if we have trouble.
3173                          */
3174                         if (foundname != NULL) {
3175                                 dns_name_init(&name, NULL);
3176                                 dns_rbt_namefromnode(node, &name);
3177                                 result = dns_name_copy(&name, foundname, NULL);
3178                                 while (result == ISC_R_SUCCESS && i > 0) {
3179                                         i--;
3180                                         level_node = search->chain.levels[i];
3181                                         dns_name_init(&name, NULL);
3182                                         dns_rbt_namefromnode(level_node,
3183                                                              &name);
3184                                         result =
3185                                                 dns_name_concatenate(foundname,
3186                                                                      &name,
3187                                                                      foundname,
3188                                                                      NULL);
3189                                 }
3190                                 if (result != ISC_R_SUCCESS) {
3191                                         *nodep = NULL;
3192                                         goto node_exit;
3193                                 }
3194                         }
3195                         result = DNS_R_DELEGATION;
3196                         if (nodep != NULL) {
3197                                 new_reference(search->rbtdb, node);
3198                                 *nodep = node;
3199                         }
3200                         bind_rdataset(search->rbtdb, node, found, search->now,
3201                                       rdataset);
3202                         if (foundsig != NULL)
3203                                 bind_rdataset(search->rbtdb, node, foundsig,
3204                                               search->now, sigrdataset);
3205                 }
3206
3207         node_exit:
3208                 NODE_UNLOCK(lock, locktype);
3209
3210                 if (found == NULL && i > 0) {
3211                         i--;
3212                         node = search->chain.levels[i];
3213                 } else
3214                         done = ISC_TRUE;
3215
3216         } while (!done);
3217
3218         return (result);
3219 }
3220
3221 static isc_result_t
3222 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3223                   isc_stdtime_t now, dns_name_t *foundname,
3224                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3225 {
3226         dns_rbtnode_t *node;
3227         rdatasetheader_t *header, *header_next, *header_prev;
3228         rdatasetheader_t *found, *foundsig;
3229         isc_boolean_t empty_node;
3230         isc_result_t result;
3231         dns_fixedname_t fname, forigin;
3232         dns_name_t *name, *origin;
3233         rbtdb_rdatatype_t matchtype, sigmatchtype;
3234         nodelock_t *lock;
3235         isc_rwlocktype_t locktype;
3236
3237         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
3238         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
3239                                              dns_rdatatype_nsec);
3240         
3241         do {
3242                 node = NULL;
3243                 dns_fixedname_init(&fname);
3244                 name = dns_fixedname_name(&fname);
3245                 dns_fixedname_init(&forigin);
3246                 origin = dns_fixedname_name(&forigin);
3247                 result = dns_rbtnodechain_current(&search->chain, name,
3248                                                   origin, &node);
3249                 if (result != ISC_R_SUCCESS)
3250                         return (result);
3251                 locktype = isc_rwlocktype_read;
3252                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
3253                 NODE_LOCK(lock, locktype);
3254                 found = NULL;
3255                 foundsig = NULL;
3256                 empty_node = ISC_TRUE;
3257                 header_prev = NULL;
3258                 for (header = node->data;
3259                      header != NULL;
3260                      header = header_next) {
3261                         header_next = header->next;
3262                         if (header->ttl <= now) {
3263                                 /*
3264                                  * This rdataset is stale.  If no one else is
3265                                  * using the node, we can clean it up right
3266                                  * now, otherwise we mark it as stale, and the
3267                                  * node as dirty, so it will get cleaned up 
3268                                  * later.
3269                                  */
3270                                 if ((header->ttl <= now - RBTDB_VIRTUAL) &&
3271                                     (locktype == isc_rwlocktype_write ||
3272                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3273                                         /*
3274                                          * We update the node's status only
3275                                          * when we can get write access.
3276                                          */
3277                                         locktype = isc_rwlocktype_write;
3278
3279                                         if (dns_rbtnode_refcurrent(node)
3280                                             == 0) {
3281                                                 isc_mem_t *m;
3282
3283                                                 m = search->rbtdb->common.mctx;
3284                                                 clean_stale_headers(m, header);
3285                                                 if (header_prev != NULL)
3286                                                         header_prev->next =
3287                                                                 header->next;
3288                                                 else
3289                                                         node->data = header->next;
3290                                                 free_rdataset(m, header);
3291                                         } else {
3292                                                 header->attributes |=
3293                                                         RDATASET_ATTR_STALE;
3294                                                 node->dirty = 1;
3295                                                 header_prev = header;
3296                                         }
3297                                 } else
3298                                         header_prev = header;
3299                                 continue;
3300                         }
3301                         if (NONEXISTENT(header) || NXDOMAIN(header)) {
3302                                 header_prev = header;
3303                                 continue;
3304                         }
3305                         empty_node = ISC_FALSE;
3306                         if (header->type == matchtype)
3307                                 found = header;
3308                         else if (header->type == sigmatchtype)
3309                                 foundsig = header;
3310                         header_prev = header;
3311                 }
3312                 if (found != NULL) {
3313                         result = dns_name_concatenate(name, origin,
3314                                                       foundname, NULL);
3315                         if (result != ISC_R_SUCCESS)
3316                                 goto unlock_node;
3317                         bind_rdataset(search->rbtdb, node, found,
3318                                       now, rdataset);
3319                         if (foundsig != NULL)
3320                                 bind_rdataset(search->rbtdb, node, foundsig,
3321                                               now, sigrdataset);
3322                         new_reference(search->rbtdb, node);
3323                         *nodep = node;
3324                         result = DNS_R_COVERINGNSEC;
3325                 } else if (!empty_node) {
3326                         result = ISC_R_NOTFOUND;
3327                 }else
3328                         result = dns_rbtnodechain_prev(&search->chain, NULL,
3329                                                        NULL);
3330  unlock_node:
3331                 NODE_UNLOCK(lock, locktype);
3332         } while (empty_node && result == ISC_R_SUCCESS);
3333         return (result);
3334 }
3335
3336 static isc_result_t
3337 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3338            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3339            dns_dbnode_t **nodep, dns_name_t *foundname,
3340            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3341 {
3342         dns_rbtnode_t *node = NULL;
3343         isc_result_t result;
3344         rbtdb_search_t search;
3345         isc_boolean_t cname_ok = ISC_TRUE;
3346         isc_boolean_t empty_node;
3347         nodelock_t *lock;
3348         isc_rwlocktype_t locktype;
3349         rdatasetheader_t *header, *header_prev, *header_next;
3350         rdatasetheader_t *found, *nsheader;
3351         rdatasetheader_t *foundsig, *nssig, *cnamesig;
3352         rbtdb_rdatatype_t sigtype, negtype;
3353
3354         UNUSED(version);
3355
3356         search.rbtdb = (dns_rbtdb_t *)db;
3357
3358         REQUIRE(VALID_RBTDB(search.rbtdb));
3359         REQUIRE(version == NULL);
3360
3361         if (now == 0)
3362                 isc_stdtime_get(&now);
3363
3364         search.rbtversion = NULL;
3365         search.serial = 1;
3366         search.options = options;
3367         search.copy_name = ISC_FALSE;
3368         search.need_cleanup = ISC_FALSE;
3369         search.wild = ISC_FALSE;
3370         search.zonecut = NULL;
3371         dns_fixedname_init(&search.zonecut_name);
3372         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3373         search.now = now;
3374
3375         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3376
3377         /*
3378          * Search down from the root of the tree.  If, while going down, we
3379          * encounter a callback node, cache_zonecut_callback() will search the
3380          * rdatasets at the zone cut for a DNAME rdataset.
3381          */
3382         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
3383                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
3384                                   cache_zonecut_callback, &search);
3385
3386         if (result == DNS_R_PARTIALMATCH) {
3387                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
3388                         result = find_coveringnsec(&search, nodep, now,
3389                                                    foundname, rdataset,
3390                                                    sigrdataset);
3391                         if (result == DNS_R_COVERINGNSEC)
3392                                 goto tree_exit;
3393                 }
3394                 if (search.zonecut != NULL) {
3395                     result = setup_delegation(&search, nodep, foundname,
3396                                               rdataset, sigrdataset);
3397                     goto tree_exit;
3398                 } else {
3399                 find_ns:
3400                         result = find_deepest_zonecut(&search, node, nodep,
3401                                                       foundname, rdataset,
3402                                                       sigrdataset);
3403                         goto tree_exit;
3404                 }
3405         } else if (result != ISC_R_SUCCESS)
3406                 goto tree_exit;
3407
3408         /*
3409          * Certain DNSSEC types are not subject to CNAME matching
3410          * (RFC4035, section 2.5 and RFC3007).
3411          *
3412          * We don't check for RRSIG, because we don't store RRSIG records
3413          * directly.
3414          */
3415         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3416                 cname_ok = ISC_FALSE;
3417
3418         /*
3419          * We now go looking for rdata...
3420          */
3421
3422         lock = &(search.rbtdb->node_locks[node->locknum].lock);
3423         locktype = isc_rwlocktype_read;
3424         NODE_LOCK(lock, locktype);
3425
3426         found = NULL;
3427         foundsig = NULL;
3428         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3429         negtype = RBTDB_RDATATYPE_VALUE(0, type);
3430         nsheader = NULL;
3431         nssig = NULL;
3432         cnamesig = NULL;
3433         empty_node = ISC_TRUE;
3434         header_prev = NULL;
3435         for (header = node->data; header != NULL; header = header_next) {
3436                 header_next = header->next;
3437                 if (header->ttl <= now) {
3438                         /*
3439                          * This rdataset is stale.  If no one else is using the
3440                          * node, we can clean it up right now, otherwise we
3441                          * mark it as stale, and the node as dirty, so it will
3442                          * get cleaned up later.
3443                          */
3444                         if ((header->ttl <= now - RBTDB_VIRTUAL) &&
3445                             (locktype == isc_rwlocktype_write ||
3446                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3447                                 /*
3448                                  * We update the node's status only when we
3449                                  * can get write access.
3450                                  */
3451                                 locktype = isc_rwlocktype_write;
3452
3453                                 if (dns_rbtnode_refcurrent(node) == 0) {
3454                                         isc_mem_t *mctx;
3455
3456                                         mctx = search.rbtdb->common.mctx;
3457                                         clean_stale_headers(mctx, header);
3458                                         if (header_prev != NULL)
3459                                                 header_prev->next =
3460                                                         header->next;
3461                                         else
3462                                                 node->data = header->next;
3463                                         free_rdataset(mctx, header);
3464                                 } else {
3465                                         header->attributes |=
3466                                                 RDATASET_ATTR_STALE;
3467                                         node->dirty = 1;
3468                                         header_prev = header;
3469                                 }
3470                         } else
3471                                 header_prev = header;
3472                 } else if (EXISTS(header)) {
3473                         /*
3474                          * We now know that there is at least one active
3475                          * non-stale rdataset at this node.
3476                          */
3477                         empty_node = ISC_FALSE;
3478
3479                         /*
3480                          * If we found a type we were looking for, remember
3481                          * it.
3482                          */
3483                         if (header->type == type ||
3484                             (type == dns_rdatatype_any &&
3485                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
3486                             (cname_ok && header->type ==
3487                              dns_rdatatype_cname)) {
3488                                 /*
3489                                  * We've found the answer.
3490                                  */
3491                                 found = header;
3492                                 if (header->type == dns_rdatatype_cname &&
3493                                     cname_ok &&
3494                                     cnamesig != NULL) {
3495                                         /*
3496                                          * If we've already got the CNAME RRSIG,
3497                                          * use it, otherwise change sigtype
3498                                          * so that we find it.
3499                                          */
3500                                         if (cnamesig != NULL)
3501                                                 foundsig = cnamesig;
3502                                         else
3503                                                 sigtype =
3504                                                     RBTDB_RDATATYPE_SIGCNAME;
3505                                         foundsig = cnamesig;
3506                                 }
3507                         } else if (header->type == sigtype) {
3508                                 /*
3509                                  * We've found the RRSIG rdataset for our
3510                                  * target type.  Remember it.
3511                                  */
3512                                 foundsig = header;
3513                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
3514                                    header->type == negtype) {
3515                                 /*
3516                                  * We've found a negative cache entry.
3517                                  */
3518                                 found = header;
3519                         } else if (header->type == dns_rdatatype_ns) {
3520                                 /*
3521                                  * Remember a NS rdataset even if we're
3522                                  * not specifically looking for it, because
3523                                  * we might need it later.
3524                                  */
3525                                 nsheader = header;
3526                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
3527                                 /*
3528                                  * If we need the NS rdataset, we'll also
3529                                  * need its signature.
3530                                  */
3531                                 nssig = header;
3532                         } else if (cname_ok &&
3533                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3534                                 /*
3535                                  * If we get a CNAME match, we'll also need
3536                                  * its signature.
3537                                  */
3538                                 cnamesig = header;
3539                         }
3540                         header_prev = header;
3541                 } else
3542                         header_prev = header;
3543         }
3544
3545         if (empty_node) {
3546                 /*
3547                  * We have an exact match for the name, but there are no
3548                  * extant rdatasets.  That means that this node doesn't
3549                  * meaningfully exist, and that we really have a partial match.
3550                  */
3551                 NODE_UNLOCK(lock, locktype);
3552                 goto find_ns;
3553         }
3554
3555         /*
3556          * If we didn't find what we were looking for...
3557          */
3558         if (found == NULL ||
3559             (found->trust == dns_trust_glue &&
3560              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
3561             (found->trust == dns_trust_pending &&
3562              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
3563                 /*
3564                  * If there is an NS rdataset at this node, then this is the
3565                  * deepest zone cut.
3566                  */
3567                 if (nsheader != NULL) {
3568                         if (nodep != NULL) {
3569                                 new_reference(search.rbtdb, node);
3570                                 *nodep = node;
3571                         }
3572                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
3573                                       rdataset);
3574                         if (nssig != NULL)
3575                                 bind_rdataset(search.rbtdb, node, nssig,
3576                                               search.now, sigrdataset);
3577                         result = DNS_R_DELEGATION;
3578                         goto node_exit;
3579                 }
3580
3581                 /*
3582                  * Go find the deepest zone cut.
3583                  */
3584                 NODE_UNLOCK(lock, locktype);
3585                 goto find_ns;
3586         }
3587
3588         /*
3589          * We found what we were looking for, or we found a CNAME.
3590          */
3591
3592         if (nodep != NULL) {
3593                 new_reference(search.rbtdb, node);
3594                 *nodep = node;
3595         }
3596
3597         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
3598                 /*
3599                  * We found a negative cache entry.
3600                  */
3601                 if (NXDOMAIN(found))
3602                         result = DNS_R_NCACHENXDOMAIN;
3603                 else
3604                         result = DNS_R_NCACHENXRRSET;
3605         } else if (type != found->type &&
3606                    type != dns_rdatatype_any &&
3607                    found->type == dns_rdatatype_cname) {
3608                 /*
3609                  * We weren't doing an ANY query and we found a CNAME instead
3610                  * of the type we were looking for, so we need to indicate
3611                  * that result to the caller.
3612                  */
3613                 result = DNS_R_CNAME;
3614         } else {
3615                 /*
3616                  * An ordinary successful query!
3617                  */
3618                 result = ISC_R_SUCCESS;
3619         }
3620
3621         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
3622             result == DNS_R_NCACHENXRRSET) {
3623                 bind_rdataset(search.rbtdb, node, found, search.now,
3624                               rdataset);
3625                 if (foundsig != NULL)
3626                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
3627                                       sigrdataset);
3628         }
3629
3630  node_exit:
3631         NODE_UNLOCK(lock, locktype);
3632
3633  tree_exit:
3634         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3635
3636         /*
3637          * If we found a zonecut but aren't going to use it, we have to
3638          * let go of it.
3639          */
3640         if (search.need_cleanup) {
3641                 node = search.zonecut;
3642                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3643
3644                 NODE_LOCK(lock, isc_rwlocktype_read);
3645                 decrement_reference(search.rbtdb, node, 0,
3646                                     isc_rwlocktype_read, isc_rwlocktype_none);
3647                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3648         }
3649
3650         dns_rbtnodechain_reset(&search.chain);
3651
3652         return (result);
3653 }
3654
3655 static isc_result_t
3656 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3657                   isc_stdtime_t now, dns_dbnode_t **nodep,
3658                   dns_name_t *foundname,
3659                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3660 {
3661         dns_rbtnode_t *node = NULL;
3662         nodelock_t *lock;
3663         isc_result_t result;
3664         rbtdb_search_t search;
3665         rdatasetheader_t *header, *header_prev, *header_next;
3666         rdatasetheader_t *found, *foundsig;
3667         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
3668         isc_rwlocktype_t locktype;
3669
3670         search.rbtdb = (dns_rbtdb_t *)db;
3671
3672         REQUIRE(VALID_RBTDB(search.rbtdb));
3673
3674         if (now == 0)
3675                 isc_stdtime_get(&now);
3676
3677         search.rbtversion = NULL;
3678         search.serial = 1;
3679         search.options = options;
3680         search.copy_name = ISC_FALSE;
3681         search.need_cleanup = ISC_FALSE;
3682         search.wild = ISC_FALSE;
3683         search.zonecut = NULL;
3684         dns_fixedname_init(&search.zonecut_name);
3685         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3686         search.now = now;
3687
3688         if ((options & DNS_DBFIND_NOEXACT) != 0)
3689                 rbtoptions |= DNS_RBTFIND_NOEXACT;
3690
3691         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3692
3693         /*
3694          * Search down from the root of the tree.
3695          */
3696         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
3697                                   &search.chain, rbtoptions, NULL, &search);
3698
3699         if (result == DNS_R_PARTIALMATCH) {
3700         find_ns:
3701                 result = find_deepest_zonecut(&search, node, nodep, foundname,
3702                                               rdataset, sigrdataset);
3703                 goto tree_exit;
3704         } else if (result != ISC_R_SUCCESS)
3705                 goto tree_exit;
3706
3707         /*
3708          * We now go looking for an NS rdataset at the node.
3709          */
3710
3711         lock = &(search.rbtdb->node_locks[node->locknum].lock);
3712         locktype = isc_rwlocktype_read;
3713         NODE_LOCK(lock, locktype);
3714
3715         found = NULL;
3716         foundsig = NULL;
3717         header_prev = NULL;
3718         for (header = node->data; header != NULL; header = header_next) {
3719                 header_next = header->next;
3720                 if (header->ttl <= now) {
3721                         /*
3722                          * This rdataset is stale.  If no one else is using the
3723                          * node, we can clean it up right now, otherwise we
3724                          * mark it as stale, and the node as dirty, so it will
3725                          * get cleaned up later.
3726                          */
3727                         if ((header->ttl <= now - RBTDB_VIRTUAL) &&
3728                             (locktype == isc_rwlocktype_write ||
3729                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3730                                 /*
3731                                  * We update the node's status only when we
3732                                  * can get write access.
3733                                  */
3734                                 locktype = isc_rwlocktype_write;
3735
3736                                 if (dns_rbtnode_refcurrent(node) == 0) {
3737                                         isc_mem_t *mctx;
3738
3739                                         mctx = search.rbtdb->common.mctx;
3740                                         clean_stale_headers(mctx, header);
3741                                         if (header_prev != NULL)
3742                                                 header_prev->next =
3743                                                         header->next;
3744                                         else
3745                                                 node->data = header->next;
3746                                         free_rdataset(mctx, header);
3747                                 } else {
3748                                         header->attributes |=
3749                                                 RDATASET_ATTR_STALE;
3750                                         node->dirty = 1;
3751                                         header_prev = header;
3752                                 }
3753                         } else
3754                                 header_prev = header;
3755                 } else if (EXISTS(header)) {
3756                         /*
3757                          * If we found a type we were looking for, remember
3758                          * it.
3759                          */
3760                         if (header->type == dns_rdatatype_ns) {
3761                                 /*
3762                                  * Remember a NS rdataset even if we're
3763                                  * not specifically looking for it, because
3764                                  * we might need it later.
3765                                  */
3766                                 found = header;
3767                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
3768                                 /*
3769                                  * If we need the NS rdataset, we'll also
3770                                  * need its signature.
3771                                  */
3772                                 foundsig = header;
3773                         }
3774                         header_prev = header;
3775                 } else
3776                         header_prev = header;
3777         }
3778
3779         if (found == NULL) {
3780                 /*
3781                  * No NS records here.
3782                  */
3783                 NODE_UNLOCK(lock, locktype);
3784                 goto find_ns;
3785         }
3786
3787         if (nodep != NULL) {
3788                 new_reference(search.rbtdb, node);
3789                 *nodep = node;
3790         }
3791
3792         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
3793         if (foundsig != NULL)
3794                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
3795                               sigrdataset);
3796
3797         NODE_UNLOCK(lock, locktype);
3798
3799  tree_exit:
3800         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3801
3802         INSIST(!search.need_cleanup);
3803
3804         dns_rbtnodechain_reset(&search.chain);
3805
3806         if (result == DNS_R_DELEGATION)
3807                 result = ISC_R_SUCCESS;
3808
3809         return (result);
3810 }
3811
3812 static void
3813 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
3814         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3815         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
3816         unsigned int refs;
3817
3818         REQUIRE(VALID_RBTDB(rbtdb));
3819         REQUIRE(targetp != NULL && *targetp == NULL);
3820
3821         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
3822         dns_rbtnode_refincrement(node, &refs);
3823         INSIST(refs != 0);
3824         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
3825
3826         *targetp = source;
3827 }
3828
3829 static void
3830 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
3831         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3832         dns_rbtnode_t *node;
3833         isc_boolean_t want_free = ISC_FALSE;
3834         isc_boolean_t inactive = ISC_FALSE;
3835         rbtdb_nodelock_t *nodelock;
3836
3837         REQUIRE(VALID_RBTDB(rbtdb));
3838         REQUIRE(targetp != NULL && *targetp != NULL);
3839
3840         node = (dns_rbtnode_t *)(*targetp);
3841         nodelock = &rbtdb->node_locks[node->locknum];
3842
3843         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
3844
3845         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
3846                                 isc_rwlocktype_none)) {
3847                 if (isc_refcount_current(&nodelock->references) == 0 &&
3848                     nodelock->exiting) {
3849                         inactive = ISC_TRUE;
3850                 }
3851         }
3852
3853         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
3854
3855         *targetp = NULL;
3856
3857         if (inactive) {
3858                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
3859                 rbtdb->active--;
3860                 if (rbtdb->active == 0)
3861                         want_free = ISC_TRUE;
3862                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
3863                 if (want_free) {
3864                         char buf[DNS_NAME_FORMATSIZE];
3865                         if (dns_name_dynamic(&rbtdb->common.origin))
3866                                 dns_name_format(&rbtdb->common.origin, buf,
3867                                                 sizeof(buf));
3868                         else
3869                                 strcpy(buf, "<UNKNOWN>");
3870                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
3871                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
3872                                       "calling free_rbtdb(%s)", buf);
3873                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
3874                 }
3875         }
3876 }
3877
3878 static isc_result_t
3879 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
3880         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3881         dns_rbtnode_t *rbtnode = node;
3882         rdatasetheader_t *header;
3883         isc_boolean_t force_expire = ISC_FALSE;
3884         /*
3885          * These are the category and module used by the cache cleaner.
3886          */
3887         isc_boolean_t log = ISC_FALSE;
3888         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
3889         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
3890         int level = ISC_LOG_DEBUG(2);
3891         char printname[DNS_NAME_FORMATSIZE];
3892
3893         REQUIRE(VALID_RBTDB(rbtdb));
3894
3895         /*
3896          * Caller must hold a tree lock.
3897          */
3898
3899         if (now == 0)
3900                 isc_stdtime_get(&now);
3901
3902         if (rbtdb->overmem) {
3903                 isc_uint32_t val;
3904
3905                 isc_random_get(&val);
3906                 /*
3907                  * XXXDCL Could stand to have a better policy, like LRU.
3908                  */
3909                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
3910
3911                 /*
3912                  * Note that 'log' can be true IFF rbtdb->overmem is also true.
3913                  * rbtdb->ovemem can currently only be true for cache databases
3914                  * -- hence all of the "overmem cache" log strings.
3915                  */
3916                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
3917                 if (log)
3918                         isc_log_write(dns_lctx, category, module, level,
3919                                       "overmem cache: %s %s",
3920                                       force_expire ? "FORCE" : "check",
3921                                       dns_rbt_formatnodename(rbtnode,
3922                                                            printname,
3923                                                            sizeof(printname)));
3924         }
3925
3926         /*
3927          * We may not need write access, but this code path is not performance
3928          * sensitive, so it should be okay to always lock as a writer.
3929          */
3930         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
3931                   isc_rwlocktype_write);
3932
3933         for (header = rbtnode->data; header != NULL; header = header->next)
3934                 if (header->ttl <= now - RBTDB_VIRTUAL) {
3935                         /*
3936                          * We don't check if refcurrent(rbtnode) == 0 and try
3937                          * to free like we do in cache_find(), because
3938                          * refcurrent(rbtnode) must be non-zero.  This is so
3939                          * because 'node' is an argument to the function.
3940                          */
3941                         header->attributes |= RDATASET_ATTR_STALE;
3942                         rbtnode->dirty = 1;
3943                         if (log)
3944                                 isc_log_write(dns_lctx, category, module,
3945                                               level, "overmem cache: stale %s",
3946                                               printname);
3947                 } else if (force_expire) {
3948                         if (! RETAIN(header)) {
3949                                 header->ttl = 0;
3950                                 header->attributes |= RDATASET_ATTR_STALE;
3951                                 rbtnode->dirty = 1;
3952                         } else if (log) {
3953                                 isc_log_write(dns_lctx, category, module,
3954                                               level, "overmem cache: "
3955                                               "reprieve by RETAIN() %s",
3956                                               printname);
3957                         }
3958                 } else if (rbtdb->overmem && log)
3959                         isc_log_write(dns_lctx, category, module, level,
3960                                       "overmem cache: saved %s", printname);
3961
3962         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
3963                     isc_rwlocktype_write);
3964
3965         return (ISC_R_SUCCESS);
3966 }
3967
3968 static void
3969 overmem(dns_db_t *db, isc_boolean_t overmem) {
3970         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3971
3972         if (IS_CACHE(rbtdb)) {
3973                 rbtdb->overmem = overmem;
3974         }
3975 }
3976
3977 static void
3978 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
3979         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3980         dns_rbtnode_t *rbtnode = node;
3981         isc_boolean_t first;
3982
3983         REQUIRE(VALID_RBTDB(rbtdb));
3984
3985         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
3986                   isc_rwlocktype_read);
3987
3988         fprintf(out, "node %p, %u references, locknum = %u\n",
3989                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
3990                 rbtnode->locknum);
3991         if (rbtnode->data != NULL) {
3992                 rdatasetheader_t *current, *top_next;
3993
3994                 for (current = rbtnode->data; current != NULL;
3995                      current = top_next) {
3996                         top_next = current->next;
3997                         first = ISC_TRUE;
3998                         fprintf(out, "\ttype %u", current->type);
3999                         do {
4000                                 if (!first)
4001                                         fprintf(out, "\t");
4002                                 first = ISC_FALSE;
4003                                 fprintf(out,
4004                                         "\tserial = %lu, ttl = %u, "
4005                                         "trust = %u, attributes = %u\n",
4006                                         (unsigned long)current->serial,
4007                                         current->ttl,
4008                                         current->trust,
4009                                         current->attributes);
4010                                 current = current->down;
4011                         } while (current != NULL);
4012                 }
4013         } else
4014                 fprintf(out, "(empty)\n");
4015
4016         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4017                     isc_rwlocktype_read);
4018 }
4019
4020 static isc_result_t
4021 createiterator(dns_db_t *db, isc_boolean_t relative_names,
4022                dns_dbiterator_t **iteratorp)
4023 {
4024         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4025         rbtdb_dbiterator_t *rbtdbiter;
4026
4027         REQUIRE(VALID_RBTDB(rbtdb));
4028
4029         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
4030         if (rbtdbiter == NULL)
4031                 return (ISC_R_NOMEMORY);
4032
4033         rbtdbiter->common.methods = &dbiterator_methods;
4034         rbtdbiter->common.db = NULL;
4035         dns_db_attach(db, &rbtdbiter->common.db);
4036         rbtdbiter->common.relative_names = relative_names;
4037         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
4038         rbtdbiter->common.cleaning = ISC_FALSE;
4039         rbtdbiter->paused = ISC_TRUE;
4040         rbtdbiter->tree_locked = isc_rwlocktype_none;
4041         rbtdbiter->result = ISC_R_SUCCESS;
4042         dns_fixedname_init(&rbtdbiter->name);
4043         dns_fixedname_init(&rbtdbiter->origin);
4044         rbtdbiter->node = NULL;
4045         rbtdbiter->delete = 0;
4046         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
4047         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
4048
4049         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
4050
4051         return (ISC_R_SUCCESS);
4052 }
4053
4054 static isc_result_t
4055 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4056                   dns_rdatatype_t type, dns_rdatatype_t covers,
4057                   isc_stdtime_t now, dns_rdataset_t *rdataset,
4058                   dns_rdataset_t *sigrdataset)
4059 {
4060         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4061         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4062         rdatasetheader_t *header, *header_next, *found, *foundsig;
4063         rbtdb_serial_t serial;
4064         rbtdb_version_t *rbtversion = version;
4065         isc_boolean_t close_version = ISC_FALSE;
4066         rbtdb_rdatatype_t matchtype, sigmatchtype;
4067
4068         REQUIRE(VALID_RBTDB(rbtdb));
4069         REQUIRE(type != dns_rdatatype_any);
4070
4071         if (rbtversion == NULL) {
4072                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
4073                 close_version = ISC_TRUE;
4074         }
4075         serial = rbtversion->serial;
4076         now = 0;
4077
4078         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4079                   isc_rwlocktype_read);
4080
4081         found = NULL;
4082         foundsig = NULL;
4083         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
4084         if (covers == 0)
4085                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4086         else
4087                 sigmatchtype = 0;
4088
4089         for (header = rbtnode->data; header != NULL; header = header_next) {
4090                 header_next = header->next;
4091                 do {
4092                         if (header->serial <= serial &&
4093                             !IGNORE(header)) {
4094                                 /*
4095                                  * Is this a "this rdataset doesn't
4096                                  * exist" record?
4097                                  */
4098                                 if (NONEXISTENT(header))
4099                                         header = NULL;
4100                                 break;
4101                         } else
4102                                 header = header->down;
4103                 } while (header != NULL);
4104                 if (header != NULL) {
4105                         /*
4106                          * We have an active, extant rdataset.  If it's a
4107                          * type we're looking for, remember it.
4108                          */
4109                         if (header->type == matchtype) {
4110                                 found = header;
4111                                 if (foundsig != NULL)
4112                                         break;
4113                         } else if (header->type == sigmatchtype) {
4114                                 foundsig = header;
4115                                 if (found != NULL)
4116                                         break;
4117                         }
4118                 }
4119         }
4120         if (found != NULL) {
4121                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
4122                 if (foundsig != NULL)
4123                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
4124                                       sigrdataset);
4125         }
4126
4127         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4128                     isc_rwlocktype_read);
4129
4130         if (close_version)
4131                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
4132                              ISC_FALSE);
4133
4134         if (found == NULL)
4135                 return (ISC_R_NOTFOUND);
4136
4137         return (ISC_R_SUCCESS);
4138 }
4139
4140 static isc_result_t
4141 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4142                    dns_rdatatype_t type, dns_rdatatype_t covers,
4143                    isc_stdtime_t now, dns_rdataset_t *rdataset,
4144                    dns_rdataset_t *sigrdataset)
4145 {
4146         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4147         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4148         rdatasetheader_t *header, *header_next, *found, *foundsig;
4149         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
4150         isc_result_t result;
4151         nodelock_t *lock;
4152         isc_rwlocktype_t locktype;
4153
4154         REQUIRE(VALID_RBTDB(rbtdb));
4155         REQUIRE(type != dns_rdatatype_any);
4156
4157         UNUSED(version);
4158
4159         result = ISC_R_SUCCESS;
4160
4161         if (now == 0)
4162                 isc_stdtime_get(&now);
4163
4164         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
4165         locktype = isc_rwlocktype_read;
4166         NODE_LOCK(lock, locktype);
4167
4168         found = NULL;
4169         foundsig = NULL;
4170         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
4171         negtype = RBTDB_RDATATYPE_VALUE(0, type);
4172         if (covers == 0)
4173                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4174         else
4175                 sigmatchtype = 0;
4176
4177         for (header = rbtnode->data; header != NULL; header = header_next) {
4178                 header_next = header->next;
4179                 if (header->ttl <= now) {
4180                         if ((header->ttl <= now - RBTDB_VIRTUAL) &&
4181                             (locktype == isc_rwlocktype_write ||
4182                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4183                                 /*
4184                                  * We update the node's status only when we
4185                                  * can get write access.
4186                                  */
4187                                 locktype = isc_rwlocktype_write;
4188                                 
4189                                 /*
4190                                  * We don't check if refcurrent(rbtnode) == 0
4191                                  * and try to free like we do in cache_find(),
4192                                  * because refcurrent(rbtnode) must be
4193                                  * non-zero.  This is so because 'node' is an
4194                                  * argument to the function.
4195                                  */
4196                                 header->attributes |= RDATASET_ATTR_STALE;
4197                                 rbtnode->dirty = 1;
4198                         }
4199                 } else if (EXISTS(header)) {
4200                         if (header->type == matchtype)
4201                                 found = header;
4202                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4203                                  header->type == negtype)
4204                                 found = header;
4205                         else if (header->type == sigmatchtype)
4206                                 foundsig = header;
4207                 }
4208         }
4209         if (found != NULL) {
4210                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
4211                 if (foundsig != NULL)
4212                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
4213                                       sigrdataset);
4214         }
4215
4216         NODE_UNLOCK(lock, locktype);
4217
4218         if (found == NULL)
4219                 return (ISC_R_NOTFOUND);
4220
4221         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4222                 /*
4223                  * We found a negative cache entry.
4224                  */
4225                 if (NXDOMAIN(found))
4226                         result = DNS_R_NCACHENXDOMAIN;
4227                 else
4228                         result = DNS_R_NCACHENXRRSET;
4229         }
4230
4231         return (result);
4232 }
4233
4234 static isc_result_t
4235 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4236              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
4237 {
4238         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4239         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4240         rbtdb_version_t *rbtversion = version;
4241         rbtdb_rdatasetiter_t *iterator;
4242         unsigned int refs;
4243
4244         REQUIRE(VALID_RBTDB(rbtdb));
4245
4246         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
4247         if (iterator == NULL)
4248                 return (ISC_R_NOMEMORY);
4249
4250         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
4251                 now = 0;
4252                 if (rbtversion == NULL)
4253                         currentversion(db,
4254                                  (dns_dbversion_t **) (void *)(&rbtversion));
4255                 else {
4256                         unsigned int refs;
4257
4258                         isc_refcount_increment(&rbtversion->references,
4259                                                &refs);
4260                         INSIST(refs > 1);
4261                 }
4262         } else {
4263                 if (now == 0)
4264                         isc_stdtime_get(&now);
4265                 rbtversion = NULL;
4266         }
4267
4268         iterator->common.magic = DNS_RDATASETITER_MAGIC;
4269         iterator->common.methods = &rdatasetiter_methods;
4270         iterator->common.db = db;
4271         iterator->common.node = node;
4272         iterator->common.version = (dns_dbversion_t *)rbtversion;
4273         iterator->common.now = now;
4274
4275         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4276
4277         dns_rbtnode_refincrement(rbtnode, &refs);
4278         INSIST(refs != 0);
4279
4280         iterator->current = NULL;
4281
4282         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4283
4284         *iteratorp = (dns_rdatasetiter_t *)iterator;
4285
4286         return (ISC_R_SUCCESS);
4287 }
4288
4289 static isc_boolean_t
4290 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
4291         rdatasetheader_t *header, *header_next;
4292         isc_boolean_t cname, other_data;
4293         dns_rdatatype_t rdtype;
4294
4295         /*
4296          * The caller must hold the node lock.
4297          */
4298
4299         /*
4300          * Look for CNAME and "other data" rdatasets active in our version.
4301          */
4302         cname = ISC_FALSE;
4303         other_data = ISC_FALSE;
4304         for (header = node->data; header != NULL; header = header_next) {
4305                 header_next = header->next;
4306                 if (header->type == dns_rdatatype_cname) {
4307                         /*
4308                          * Look for an active extant CNAME.
4309                          */
4310                         do {
4311                                 if (header->serial <= serial &&
4312                                     !IGNORE(header)) {
4313                                         /*
4314                                          * Is this a "this rdataset doesn't
4315                                          * exist" record?
4316                                          */
4317                                         if (NONEXISTENT(header))
4318                                                 header = NULL;
4319                                         break;
4320                                 } else
4321                                         header = header->down;
4322                         } while (header != NULL);
4323                         if (header != NULL)
4324                                 cname = ISC_TRUE;
4325                 } else {
4326                         /*
4327                          * Look for active extant "other data".
4328                          *
4329                          * "Other data" is any rdataset whose type is not
4330                          * KEY, RRSIG KEY, NSEC, RRSIG NSEC or RRSIG CNAME.
4331                          */
4332                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
4333                         if (rdtype == dns_rdatatype_rrsig ||
4334                             rdtype == dns_rdatatype_sig)
4335                                 rdtype = RBTDB_RDATATYPE_EXT(header->type);
4336                         if (rdtype != dns_rdatatype_nsec &&
4337                             rdtype != dns_rdatatype_key &&
4338                             rdtype != dns_rdatatype_cname) {
4339                                 /*
4340                                  * We've found a type that isn't
4341                                  * NSEC, KEY, CNAME, or one of their
4342                                  * signatures.  Is it active and extant?
4343                                  */
4344                                 do {
4345                                         if (header->serial <= serial &&
4346                                             !IGNORE(header)) {
4347                                                 /*
4348                                                  * Is this a "this rdataset
4349                                                  * doesn't exist" record?
4350                                                  */
4351                                                 if (NONEXISTENT(header))
4352                                                         header = NULL;
4353                                                 break;
4354                                         } else
4355                                                 header = header->down;
4356                                 } while (header != NULL);
4357                                 if (header != NULL)
4358                                         other_data = ISC_TRUE;
4359                         }
4360                 }
4361         }
4362
4363         if (cname && other_data)
4364                 return (ISC_TRUE);
4365
4366         return (ISC_FALSE);
4367 }
4368
4369 static isc_result_t
4370 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
4371     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
4372     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
4373 {
4374         rbtdb_changed_t *changed = NULL;
4375         rdatasetheader_t *topheader, *topheader_prev, *header;
4376         unsigned char *merged;
4377         isc_result_t result;
4378         isc_boolean_t header_nx;
4379         isc_boolean_t newheader_nx;
4380         isc_boolean_t merge;
4381         dns_rdatatype_t rdtype, covers;
4382         rbtdb_rdatatype_t negtype;
4383         dns_trust_t trust;
4384
4385         /*
4386          * Add an rdatasetheader_t to a node.
4387          */
4388
4389         /*
4390          * Caller must be holding the node lock.
4391          */
4392
4393         if ((options & DNS_DBADD_MERGE) != 0) {
4394                 REQUIRE(rbtversion != NULL);
4395                 merge = ISC_TRUE;
4396         } else
4397                 merge = ISC_FALSE;
4398
4399         if ((options & DNS_DBADD_FORCE) != 0)
4400                 trust = dns_trust_ultimate;
4401         else
4402                 trust = newheader->trust;
4403
4404         if (rbtversion != NULL && !loading) {
4405                 /*
4406                  * We always add a changed record, even if no changes end up
4407                  * being made to this node, because it's harmless and
4408                  * simplifies the code.
4409                  */
4410                 changed = add_changed(rbtdb, rbtversion, rbtnode);
4411                 if (changed == NULL) {
4412                         free_rdataset(rbtdb->common.mctx, newheader);
4413                         return (ISC_R_NOMEMORY);
4414                 }
4415         }
4416
4417         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
4418         topheader_prev = NULL;
4419
4420         negtype = 0;
4421         if (rbtversion == NULL && !newheader_nx) {
4422                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
4423                 if (rdtype == 0) {
4424                         /*
4425                          * We're adding a negative cache entry.
4426                          */
4427                         covers = RBTDB_RDATATYPE_EXT(newheader->type);
4428                         if (covers == dns_rdatatype_any) {
4429                                 /*
4430                                  * We're adding an negative cache entry
4431                                  * which covers all types (NXDOMAIN,
4432                                  * NODATA(QTYPE=ANY)).
4433                                  *
4434                                  * We make all other data stale so that the
4435                                  * only rdataset that can be found at this
4436                                  * node is the negative cache entry.
4437                                  */
4438                                 for (topheader = rbtnode->data;
4439                                      topheader != NULL;
4440                                      topheader = topheader->next) {
4441                                         topheader->ttl = 0;
4442                                         topheader->attributes |=
4443                                                 RDATASET_ATTR_STALE;
4444                                 }
4445                                 rbtnode->dirty = 1;
4446                                 goto find_header;
4447                         }
4448                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
4449                 } else {
4450                         /*
4451                          * We're adding something that isn't a
4452                          * negative cache entry.  Look for an extant
4453                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
4454                          * cache entry.
4455                          */
4456                         for (topheader = rbtnode->data;
4457                              topheader != NULL;
4458                              topheader = topheader->next) {
4459                                 if (topheader->type == 
4460                                     RBTDB_RDATATYPE_NCACHEANY)
4461                                         break;
4462                         }
4463                         if (topheader != NULL && EXISTS(topheader) &&
4464                             topheader->ttl > now) {
4465                                 /*
4466                                  * Found one.
4467                                  */
4468                                 if (trust < topheader->trust) {
4469                                         /*
4470                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
4471                                          * is more trusted.
4472                                          */
4473                                         
4474                                         free_rdataset(rbtdb->common.mctx,
4475                                                       newheader);
4476                                         if (addedrdataset != NULL)
4477                                                 bind_rdataset(rbtdb, rbtnode,
4478                                                               topheader, now,
4479                                                               addedrdataset);
4480                                         return (DNS_R_UNCHANGED);
4481                                 }
4482                                 /*
4483                                  * The new rdataset is better.  Expire the
4484                                  * NXDOMAIN/NODATA(QTYPE=ANY).
4485                                  */
4486                                 topheader->ttl = 0;
4487                                 topheader->attributes |= RDATASET_ATTR_STALE;
4488                                 rbtnode->dirty = 1;
4489                                 topheader = NULL;
4490                                 goto find_header;
4491                         }
4492                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
4493                 }
4494         }
4495
4496         for (topheader = rbtnode->data;
4497              topheader != NULL;
4498              topheader = topheader->next) {
4499                 if (topheader->type == newheader->type ||
4500                     topheader->type == negtype)
4501                         break;
4502                 topheader_prev = topheader;
4503         }
4504
4505  find_header:
4506         /*
4507          * If header isn't NULL, we've found the right type.  There may be
4508          * IGNORE rdatasets between the top of the chain and the first real
4509          * data.  We skip over them.
4510          */
4511         header = topheader;
4512         while (header != NULL && IGNORE(header))
4513                 header = header->down;
4514         if (header != NULL) {
4515                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
4516
4517                 /*
4518                  * Deleting an already non-existent rdataset has no effect.
4519                  */
4520                 if (header_nx && newheader_nx) {
4521                         free_rdataset(rbtdb->common.mctx, newheader);
4522                         return (DNS_R_UNCHANGED);
4523                 }
4524
4525                 /*
4526                  * Trying to add an rdataset with lower trust to a cache DB
4527                  * has no effect, provided that the cache data isn't stale.
4528                  */
4529                 if (rbtversion == NULL && trust < header->trust &&
4530                     (header->ttl > now || header_nx)) {
4531                         free_rdataset(rbtdb->common.mctx, newheader);
4532                         if (addedrdataset != NULL)
4533                                 bind_rdataset(rbtdb, rbtnode, header, now,
4534                                               addedrdataset);
4535                         return (DNS_R_UNCHANGED);
4536                 }
4537
4538                 /*
4539                  * Don't merge if a nonexistent rdataset is involved.
4540                  */
4541                 if (merge && (header_nx || newheader_nx))
4542                         merge = ISC_FALSE;
4543
4544                 /*
4545                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
4546                  * that is the union of 'newheader' and 'header'.
4547                  */
4548                 if (merge) {
4549                         unsigned int flags = 0;
4550                         INSIST(rbtversion->serial >= header->serial);
4551                         merged = NULL;
4552                         result = ISC_R_SUCCESS;
4553                         
4554                         if ((options & DNS_DBADD_EXACT) != 0)
4555                                 flags |= DNS_RDATASLAB_EXACT;
4556                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
4557                              newheader->ttl != header->ttl)
4558                                         result = DNS_R_NOTEXACT;
4559                         else if (newheader->ttl != header->ttl)
4560                                 flags |= DNS_RDATASLAB_FORCE;
4561                         if (result == ISC_R_SUCCESS)
4562                                 result = dns_rdataslab_merge(
4563                                              (unsigned char *)header,
4564                                              (unsigned char *)newheader,
4565                                              (unsigned int)(sizeof(*newheader)),
4566                                              rbtdb->common.mctx,
4567                                              rbtdb->common.rdclass,
4568                                              (dns_rdatatype_t)header->type,
4569                                              flags, &merged);
4570                         if (result == ISC_R_SUCCESS) {
4571                                 /*
4572                                  * If 'header' has the same serial number as
4573                                  * we do, we could clean it up now if we knew
4574                                  * that our caller had no references to it.
4575                                  * We don't know this, however, so we leave it
4576                                  * alone.  It will get cleaned up when
4577                                  * clean_zone_node() runs.
4578                                  */
4579                                 free_rdataset(rbtdb->common.mctx, newheader);
4580                                 newheader = (rdatasetheader_t *)merged;
4581                         } else {
4582                                 free_rdataset(rbtdb->common.mctx, newheader);
4583                                 return (result);
4584                         }
4585                 }
4586                 /*
4587                  * Don't replace existing NS, A and AAAA RRsets
4588                  * in the cache if they are already exist.  This
4589                  * prevents named being locked to old servers.
4590                  * Don't lower trust of existing record if the
4591                  * update is forced.
4592                  */
4593                 if (IS_CACHE(rbtdb) && header->ttl > now &&
4594                     header->type == dns_rdatatype_ns &&
4595                     !header_nx && !newheader_nx &&
4596                     header->trust >= newheader->trust &&
4597                     dns_rdataslab_equalx((unsigned char *)header,
4598                                          (unsigned char *)newheader,
4599                                          (unsigned int)(sizeof(*newheader)),
4600                                          rbtdb->common.rdclass,
4601                                          (dns_rdatatype_t)header->type)) {
4602                         /*
4603                          * Honour the new ttl if it is less than the
4604                          * older one.
4605                          */
4606                         if (header->ttl > newheader->ttl)
4607                                 header->ttl = newheader->ttl;
4608                         if (header->noqname == NULL &&
4609                             newheader->noqname != NULL) {
4610                                 header->noqname = newheader->noqname;
4611                                 newheader->noqname = NULL;
4612                         }
4613                         free_rdataset(rbtdb->common.mctx, newheader);
4614                         if (addedrdataset != NULL)
4615                                 bind_rdataset(rbtdb, rbtnode, header, now,
4616                                               addedrdataset);
4617                         return (ISC_R_SUCCESS);
4618                 }
4619                 if (IS_CACHE(rbtdb) && header->ttl > now &&
4620                     (header->type == dns_rdatatype_a ||
4621                      header->type == dns_rdatatype_aaaa) &&
4622                     !header_nx && !newheader_nx &&
4623                     header->trust >= newheader->trust &&
4624                     dns_rdataslab_equal((unsigned char *)header,
4625                                         (unsigned char *)newheader,
4626                                         (unsigned int)(sizeof(*newheader)))) {
4627                         /*
4628                          * Honour the new ttl if it is less than the
4629                          * older one.
4630                          */
4631                         if (header->ttl > newheader->ttl)
4632                                 header->ttl = newheader->ttl;
4633                         if (header->noqname == NULL &&
4634                             newheader->noqname != NULL) {
4635                                 header->noqname = newheader->noqname;
4636                                 newheader->noqname = NULL;
4637                         }
4638                         free_rdataset(rbtdb->common.mctx, newheader);
4639                         if (addedrdataset != NULL)
4640                                 bind_rdataset(rbtdb, rbtnode, header, now,
4641                                               addedrdataset);
4642                         return (ISC_R_SUCCESS);
4643                 }
4644                 INSIST(rbtversion == NULL ||
4645                        rbtversion->serial >= topheader->serial);
4646                 if (topheader_prev != NULL)
4647                         topheader_prev->next = newheader;
4648                 else
4649                         rbtnode->data = newheader;
4650                 newheader->next = topheader->next;
4651                 if (loading) {
4652                         /*
4653                          * There are no other references to 'header' when
4654                          * loading, so we MAY clean up 'header' now.
4655                          * Since we don't generate changed records when
4656                          * loading, we MUST clean up 'header' now.
4657                          */
4658                         newheader->down = NULL;
4659                         free_rdataset(rbtdb->common.mctx, header);
4660                 } else {
4661                         newheader->down = topheader;
4662                         topheader->next = newheader;
4663                         rbtnode->dirty = 1;
4664                         if (changed != NULL)
4665                                 changed->dirty = ISC_TRUE;
4666                         if (rbtversion == NULL) {
4667                                 header->ttl = 0;
4668                                 header->attributes |= RDATASET_ATTR_STALE;
4669                         }
4670                 }
4671         } else {
4672                 /*
4673                  * No non-IGNORED rdatasets of the given type exist at
4674                  * this node.
4675                  */
4676
4677                 /*
4678                  * If we're trying to delete the type, don't bother.
4679                  */
4680                 if (newheader_nx) {
4681                         free_rdataset(rbtdb->common.mctx, newheader);
4682                         return (DNS_R_UNCHANGED);
4683                 }
4684
4685                 if (topheader != NULL) {
4686                         /*
4687                          * We have an list of rdatasets of the given type,
4688                          * but they're all marked IGNORE.  We simply insert
4689                          * the new rdataset at the head of the list.
4690                          *
4691                          * Ignored rdatasets cannot occur during loading, so
4692                          * we INSIST on it.
4693                          */
4694                         INSIST(!loading);
4695                         INSIST(rbtversion == NULL ||
4696                                rbtversion->serial >= topheader->serial);
4697                         if (topheader_prev != NULL)
4698                                 topheader_prev->next = newheader;
4699                         else
4700                                 rbtnode->data = newheader;
4701                         newheader->next = topheader->next;
4702                         newheader->down = topheader;
4703                         topheader->next = newheader;
4704                         rbtnode->dirty = 1;
4705                         if (changed != NULL)
4706                                 changed->dirty = ISC_TRUE;
4707                 } else {
4708                         /*
4709                          * No rdatasets of the given type exist at the node.
4710                          */
4711                         newheader->next = rbtnode->data;
4712                         newheader->down = NULL;
4713                         rbtnode->data = newheader;
4714                 }
4715         }
4716
4717         /*
4718          * Check if the node now contains CNAME and other data.
4719          */
4720         if (rbtversion != NULL &&
4721             cname_and_other_data(rbtnode, rbtversion->serial))
4722                 return (DNS_R_CNAMEANDOTHER);
4723
4724         if (addedrdataset != NULL)
4725                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
4726
4727         return (ISC_R_SUCCESS);
4728 }
4729
4730 static inline isc_boolean_t
4731 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
4732                 rbtdb_rdatatype_t type)
4733 {
4734         if (IS_CACHE(rbtdb)) {
4735                 if (type == dns_rdatatype_dname)
4736                         return (ISC_TRUE);
4737                 else
4738                         return (ISC_FALSE);
4739         } else if (type == dns_rdatatype_dname ||
4740                    (type == dns_rdatatype_ns &&
4741                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
4742                 return (ISC_TRUE);
4743         return (ISC_FALSE);
4744 }
4745
4746 static inline isc_result_t
4747 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
4748            dns_rdataset_t *rdataset)
4749 {
4750         struct noqname *noqname;
4751         isc_mem_t *mctx = rbtdb->common.mctx;
4752         dns_name_t name;
4753         dns_rdataset_t nsec, nsecsig;
4754         isc_result_t result;
4755         isc_region_t r;
4756
4757         dns_name_init(&name, NULL);
4758         dns_rdataset_init(&nsec);
4759         dns_rdataset_init(&nsecsig);
4760
4761         result = dns_rdataset_getnoqname(rdataset, &name, &nsec, &nsecsig);
4762         RUNTIME_CHECK(result == ISC_R_SUCCESS);
4763
4764         noqname = isc_mem_get(mctx, sizeof(*noqname));
4765         if (noqname == NULL) {
4766                 result = ISC_R_NOMEMORY;
4767                 goto cleanup;
4768         }
4769         dns_name_init(&noqname->name, NULL);
4770         noqname->nsec = NULL;
4771         noqname->nsecsig = NULL;
4772         result = dns_name_dup(&name, mctx, &noqname->name);
4773         if (result != ISC_R_SUCCESS)
4774                 goto cleanup;
4775         result = dns_rdataslab_fromrdataset(&nsec, mctx, &r, 0);
4776         if (result != ISC_R_SUCCESS)
4777                 goto cleanup;
4778         noqname->nsec = r.base;
4779         result = dns_rdataslab_fromrdataset(&nsecsig, mctx, &r, 0);
4780         if (result != ISC_R_SUCCESS)
4781                 goto cleanup;
4782         noqname->nsecsig = r.base;
4783         dns_rdataset_disassociate(&nsec);
4784         dns_rdataset_disassociate(&nsecsig);
4785         newheader->noqname = noqname;
4786         return (ISC_R_SUCCESS);
4787
4788 cleanup:
4789         dns_rdataset_disassociate(&nsec);
4790         dns_rdataset_disassociate(&nsecsig);
4791         free_noqname(mctx, &noqname);
4792         return(result);
4793 }
4794
4795 static isc_result_t
4796 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4797             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
4798             dns_rdataset_t *addedrdataset)
4799 {
4800         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4801         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4802         rbtdb_version_t *rbtversion = version;
4803         isc_region_t region;
4804         rdatasetheader_t *newheader;
4805         isc_result_t result;
4806         isc_boolean_t delegating;
4807
4808         REQUIRE(VALID_RBTDB(rbtdb));
4809
4810         if (rbtversion == NULL) {
4811                 if (now == 0)
4812                         isc_stdtime_get(&now);
4813         } else
4814                 now = 0;
4815
4816         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
4817                                             &region,
4818                                             sizeof(rdatasetheader_t));
4819         if (result != ISC_R_SUCCESS)
4820                 return (result);
4821
4822         newheader = (rdatasetheader_t *)region.base;
4823         newheader->ttl = rdataset->ttl + now;
4824         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
4825                                                 rdataset->covers);
4826         newheader->attributes = 0;
4827         newheader->noqname = NULL;
4828         newheader->count = 0;
4829         newheader->trust = rdataset->trust;
4830         newheader->additional_auth = NULL;
4831         newheader->additional_glue = NULL;
4832         if (rbtversion != NULL) {
4833                 newheader->serial = rbtversion->serial;
4834                 now = 0;
4835         } else {
4836                 newheader->serial = 1;
4837                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
4838                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
4839                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
4840                         result = addnoqname(rbtdb, newheader, rdataset);
4841                         if (result != ISC_R_SUCCESS) {
4842                                 free_rdataset(rbtdb->common.mctx, newheader);
4843                                 return (result);
4844                         }
4845                 }
4846         }
4847
4848         /*
4849          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
4850          * just DNAME for the cache), then we need to set the callback bit
4851          * on the node, and to do that we must be holding an exclusive lock
4852          * on the tree.
4853          */
4854         if (delegating_type(rbtdb, rbtnode, rdataset->type)) {
4855                 delegating = ISC_TRUE;
4856                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
4857         } else
4858                 delegating = ISC_FALSE;
4859
4860         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4861                   isc_rwlocktype_write);
4862
4863         result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
4864                      addedrdataset, now);
4865         if (result == ISC_R_SUCCESS && delegating)
4866                 rbtnode->find_callback = 1;
4867
4868         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4869                     isc_rwlocktype_write);
4870
4871         if (delegating)
4872                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
4873
4874         /*
4875          * Update the zone's secure status.  If version is non-NULL
4876          * this is defered until closeversion() is called.
4877          */
4878         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
4879                 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
4880
4881         return (result);
4882 }
4883
4884 static isc_result_t
4885 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4886                  dns_rdataset_t *rdataset, unsigned int options,
4887                  dns_rdataset_t *newrdataset)
4888 {
4889         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4890         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4891         rbtdb_version_t *rbtversion = version;
4892         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
4893         unsigned char *subresult;
4894         isc_region_t region;
4895         isc_result_t result;
4896         rbtdb_changed_t *changed;
4897
4898         REQUIRE(VALID_RBTDB(rbtdb));
4899
4900         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
4901                                             &region,
4902                                             sizeof(rdatasetheader_t));
4903         if (result != ISC_R_SUCCESS)
4904                 return (result);
4905         newheader = (rdatasetheader_t *)region.base;
4906         newheader->ttl = rdataset->ttl;
4907         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
4908                                                 rdataset->covers);
4909         newheader->attributes = 0;
4910         newheader->serial = rbtversion->serial;
4911         newheader->trust = 0;
4912         newheader->noqname = NULL;
4913         newheader->count = 0;
4914         newheader->additional_auth = NULL;
4915         newheader->additional_glue = NULL;
4916
4917         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4918                   isc_rwlocktype_write);
4919
4920         changed = add_changed(rbtdb, rbtversion, rbtnode);
4921         if (changed == NULL) {
4922                 free_rdataset(rbtdb->common.mctx, newheader);
4923                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4924                             isc_rwlocktype_write);
4925                 return (ISC_R_NOMEMORY);
4926         }
4927
4928         topheader_prev = NULL;
4929         for (topheader = rbtnode->data;
4930              topheader != NULL;
4931              topheader = topheader->next) {
4932                 if (topheader->type == newheader->type)
4933                         break;
4934                 topheader_prev = topheader;
4935         }
4936         /*
4937          * If header isn't NULL, we've found the right type.  There may be
4938          * IGNORE rdatasets between the top of the chain and the first real
4939          * data.  We skip over them.
4940          */
4941         header = topheader;
4942         while (header != NULL && IGNORE(header))
4943                 header = header->down;
4944         if (header != NULL && EXISTS(header)) {
4945                 unsigned int flags = 0;
4946                 subresult = NULL;
4947                 result = ISC_R_SUCCESS;
4948                 if ((options & DNS_DBSUB_EXACT) != 0) {
4949                         flags |= DNS_RDATASLAB_EXACT;
4950                         if (newheader->ttl != header->ttl)
4951                                 result = DNS_R_NOTEXACT;
4952                 }
4953                 if (result == ISC_R_SUCCESS)
4954                         result = dns_rdataslab_subtract(
4955                                         (unsigned char *)header,
4956                                         (unsigned char *)newheader,
4957                                         (unsigned int)(sizeof(*newheader)),
4958                                         rbtdb->common.mctx,
4959                                         rbtdb->common.rdclass,
4960                                         (dns_rdatatype_t)header->type,
4961                                         flags, &subresult);
4962                 if (result == ISC_R_SUCCESS) {
4963                         free_rdataset(rbtdb->common.mctx, newheader);
4964                         newheader = (rdatasetheader_t *)subresult;
4965                         /*
4966                          * We have to set the serial since the rdataslab
4967                          * subtraction routine copies the reserved portion of
4968                          * header, not newheader.
4969                          */
4970                         newheader->serial = rbtversion->serial;
4971                         /*
4972                          * XXXJT: dns_rdataslab_subtract() copied the pointers
4973                          * to additional info.  We need to clear these fields
4974                          * to avoid having duplicated references.
4975                          */
4976                         newheader->additional_auth = NULL;
4977                         newheader->additional_glue = NULL;
4978                 } else if (result == DNS_R_NXRRSET) {
4979                         /*
4980                          * This subtraction would remove all of the rdata;
4981                          * add a nonexistent header instead.
4982                          */
4983                         free_rdataset(rbtdb->common.mctx, newheader);
4984                         newheader = isc_mem_get(rbtdb->common.mctx,
4985                                                 sizeof(*newheader));
4986                         if (newheader == NULL) {
4987                                 result = ISC_R_NOMEMORY;
4988                                 goto unlock;
4989                         }
4990                         newheader->ttl = 0;
4991                         newheader->type = topheader->type;
4992                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
4993                         newheader->trust = 0;
4994                         newheader->serial = rbtversion->serial;
4995                         newheader->noqname = NULL;
4996                         newheader->count = 0;
4997                         newheader->additional_auth = NULL;
4998                         newheader->additional_glue = NULL;
4999                 } else {
5000                         free_rdataset(rbtdb->common.mctx, newheader);
5001                         goto unlock;
5002                 }
5003
5004                 /*
5005                  * If we're here, we want to link newheader in front of
5006                  * topheader.
5007                  */
5008                 INSIST(rbtversion->serial >= topheader->serial);
5009                 if (topheader_prev != NULL)
5010                         topheader_prev->next = newheader;
5011                 else
5012                         rbtnode->data = newheader;
5013                 newheader->next = topheader->next;
5014                 newheader->down = topheader;
5015                 topheader->next = newheader;
5016                 rbtnode->dirty = 1;
5017                 changed->dirty = ISC_TRUE;
5018         } else {
5019                 /*
5020                  * The rdataset doesn't exist, so we don't need to do anything
5021                  * to satisfy the deletion request.
5022                  */
5023                 free_rdataset(rbtdb->common.mctx, newheader);
5024                 if ((options & DNS_DBSUB_EXACT) != 0)
5025                         result = DNS_R_NOTEXACT;
5026                 else
5027                         result = DNS_R_UNCHANGED;                       
5028         }
5029
5030         if (result == ISC_R_SUCCESS && newrdataset != NULL)
5031                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
5032
5033  unlock:
5034         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5035                     isc_rwlocktype_write);
5036
5037         /*
5038          * Update the zone's secure status.  If version is non-NULL
5039          * this is defered until closeversion() is called.
5040          */
5041         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
5042                 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
5043
5044         return (result);
5045 }
5046
5047 static isc_result_t
5048 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5049                dns_rdatatype_t type, dns_rdatatype_t covers)
5050 {
5051         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5052         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5053         rbtdb_version_t *rbtversion = version;
5054         isc_result_t result;
5055         rdatasetheader_t *newheader;
5056
5057         REQUIRE(VALID_RBTDB(rbtdb));
5058
5059         if (type == dns_rdatatype_any)
5060                 return (ISC_R_NOTIMPLEMENTED);
5061         if (type == dns_rdatatype_rrsig && covers == 0)
5062                 return (ISC_R_NOTIMPLEMENTED);
5063
5064         newheader = isc_mem_get(rbtdb->common.mctx, sizeof(*newheader));
5065         if (newheader == NULL)
5066                 return (ISC_R_NOMEMORY);
5067         newheader->ttl = 0;
5068         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
5069         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
5070         newheader->trust = 0;
5071         newheader->noqname = NULL;
5072         newheader->additional_auth = NULL;
5073         newheader->additional_glue = NULL;
5074         if (rbtversion != NULL)
5075                 newheader->serial = rbtversion->serial;
5076         else
5077                 newheader->serial = 0;
5078         newheader->count = 0;
5079
5080         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5081                   isc_rwlocktype_write);
5082
5083         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
5084                      ISC_FALSE, NULL, 0);
5085
5086         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5087                     isc_rwlocktype_write);
5088
5089         /*
5090          * Update the zone's secure status.  If version is non-NULL
5091          * this is defered until closeversion() is called.
5092          */
5093         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
5094                 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
5095
5096         return (result);
5097 }
5098
5099 static isc_result_t
5100 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
5101         rbtdb_load_t *loadctx = arg;
5102         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
5103         dns_rbtnode_t *node;
5104         isc_result_t result;
5105         isc_region_t region;
5106         rdatasetheader_t *newheader;
5107
5108         /*
5109          * This routine does no node locking.  See comments in
5110          * 'load' below for more information on loading and
5111          * locking.
5112          */
5113
5114
5115         /*
5116          * SOA records are only allowed at top of zone.
5117          */
5118         if (rdataset->type == dns_rdatatype_soa &&
5119             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
5120                 return (DNS_R_NOTZONETOP);
5121
5122         add_empty_wildcards(rbtdb, name);
5123
5124         if (dns_name_iswildcard(name)) {
5125                 /*
5126                  * NS record owners cannot legally be wild cards.
5127                  */
5128                 if (rdataset->type == dns_rdatatype_ns)
5129                         return (DNS_R_INVALIDNS);
5130                 result = add_wildcard_magic(rbtdb, name);
5131                 if (result != ISC_R_SUCCESS)
5132                         return (result);
5133         }
5134
5135         node = NULL;
5136         result = dns_rbt_addnode(rbtdb->tree, name, &node);
5137         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
5138                 return (result);
5139         if (result != ISC_R_EXISTS) {
5140                 dns_name_t foundname;
5141                 dns_name_init(&foundname, NULL);
5142                 dns_rbt_namefromnode(node, &foundname);
5143 #ifdef DNS_RBT_USEHASH
5144                 node->locknum = node->hashval % rbtdb->node_lock_count;
5145 #else
5146                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
5147                         rbtdb->node_lock_count;
5148 #endif
5149         }
5150
5151         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5152                                             &region,
5153                                             sizeof(rdatasetheader_t));
5154         if (result != ISC_R_SUCCESS)
5155                 return (result);
5156         newheader = (rdatasetheader_t *)region.base;
5157         newheader->ttl = rdataset->ttl + loadctx->now; /* XXX overflow check */
5158         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5159                                                 rdataset->covers);
5160         newheader->attributes = 0;
5161         newheader->trust = rdataset->trust;
5162         newheader->serial = 1;
5163         newheader->noqname = NULL;
5164         newheader->count = 0;
5165         newheader->additional_auth = NULL;
5166         newheader->additional_glue = NULL;
5167
5168         result = add(rbtdb, node, rbtdb->current_version, newheader,
5169                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
5170         if (result == ISC_R_SUCCESS &&
5171             delegating_type(rbtdb, node, rdataset->type))
5172                 node->find_callback = 1;
5173         else if (result == DNS_R_UNCHANGED)
5174                 result = ISC_R_SUCCESS;
5175
5176         return (result);
5177 }
5178
5179 static isc_result_t
5180 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
5181         rbtdb_load_t *loadctx;
5182         dns_rbtdb_t *rbtdb;
5183
5184         rbtdb = (dns_rbtdb_t *)db;
5185
5186         REQUIRE(VALID_RBTDB(rbtdb));
5187
5188         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
5189         if (loadctx == NULL)
5190                 return (ISC_R_NOMEMORY);
5191
5192         loadctx->rbtdb = rbtdb;
5193         if (IS_CACHE(rbtdb))
5194                 isc_stdtime_get(&loadctx->now);
5195         else
5196                 loadctx->now = 0;
5197
5198         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5199
5200         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
5201                 == 0);
5202         rbtdb->attributes |= RBTDB_ATTR_LOADING;
5203
5204         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5205
5206         *addp = loading_addrdataset;
5207         *dbloadp = loadctx;
5208
5209         return (ISC_R_SUCCESS);
5210 }
5211
5212 static isc_result_t
5213 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
5214         rbtdb_load_t *loadctx;
5215         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5216
5217         REQUIRE(VALID_RBTDB(rbtdb));
5218         REQUIRE(dbloadp != NULL);
5219         loadctx = *dbloadp;
5220         REQUIRE(loadctx->rbtdb == rbtdb);
5221
5222         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5223
5224         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
5225         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
5226
5227         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
5228         rbtdb->attributes |= RBTDB_ATTR_LOADED;
5229
5230         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5231
5232         /*
5233          * If there's a KEY rdataset at the zone origin containing a
5234          * zone key, we consider the zone secure.
5235          */
5236         if (! IS_CACHE(rbtdb))
5237                 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
5238
5239         *dbloadp = NULL;
5240
5241         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
5242
5243         return (ISC_R_SUCCESS);
5244 }
5245
5246 static isc_result_t
5247 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
5248      dns_masterformat_t masterformat) {
5249         dns_rbtdb_t *rbtdb;
5250
5251         rbtdb = (dns_rbtdb_t *)db;
5252
5253         REQUIRE(VALID_RBTDB(rbtdb));
5254
5255         return (dns_master_dump2(rbtdb->common.mctx, db, version,
5256                                  &dns_master_style_default,
5257                                  filename, masterformat));
5258 }
5259
5260 static void
5261 delete_callback(void *data, void *arg) {
5262         dns_rbtdb_t *rbtdb = arg;
5263         rdatasetheader_t *current, *next;
5264
5265         for (current = data; current != NULL; current = next) {
5266                 next = current->next;
5267                 free_rdataset(rbtdb->common.mctx, current);
5268         }
5269 }
5270
5271 static isc_boolean_t
5272 issecure(dns_db_t *db) {
5273         dns_rbtdb_t *rbtdb;
5274         isc_boolean_t secure;
5275
5276         rbtdb = (dns_rbtdb_t *)db;
5277
5278         REQUIRE(VALID_RBTDB(rbtdb));
5279
5280         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5281         secure = rbtdb->secure;
5282         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5283
5284         return (secure);
5285 }
5286
5287 static unsigned int
5288 nodecount(dns_db_t *db) {
5289         dns_rbtdb_t *rbtdb;
5290         unsigned int count;
5291
5292         rbtdb = (dns_rbtdb_t *)db;
5293
5294         REQUIRE(VALID_RBTDB(rbtdb));
5295
5296         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5297         count = dns_rbt_nodecount(rbtdb->tree);
5298         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5299
5300         return (count);
5301 }
5302
5303 static void
5304 settask(dns_db_t *db, isc_task_t *task) {
5305         dns_rbtdb_t *rbtdb;
5306
5307         rbtdb = (dns_rbtdb_t *)db;
5308
5309         REQUIRE(VALID_RBTDB(rbtdb));
5310
5311         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5312         if (rbtdb->task != NULL)
5313                 isc_task_detach(&rbtdb->task);
5314         if (task != NULL)
5315                 isc_task_attach(task, &rbtdb->task);
5316         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5317 }
5318
5319 static isc_boolean_t
5320 ispersistent(dns_db_t *db) {
5321         UNUSED(db);
5322         return (ISC_FALSE);
5323 }
5324
5325 static isc_result_t
5326 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
5327         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5328         dns_rbtnode_t *onode;
5329         isc_result_t result = ISC_R_SUCCESS;
5330
5331         REQUIRE(VALID_RBTDB(rbtdb));
5332         REQUIRE(nodep != NULL && *nodep == NULL);
5333
5334         /* Note that the access to origin_node doesn't require a DB lock */
5335         onode = (dns_rbtnode_t *)rbtdb->origin_node;
5336         if (onode != NULL) {
5337                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
5338                 new_reference(rbtdb, onode);
5339                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
5340
5341                 *nodep = rbtdb->origin_node;
5342         } else {
5343                 INSIST(!IS_CACHE(rbtdb));
5344                 result = ISC_R_NOTFOUND;
5345         }
5346
5347         return (result);
5348 }
5349
5350 static dns_dbmethods_t zone_methods = {
5351         attach,
5352         detach,
5353         beginload,
5354         endload,
5355         dump,
5356         currentversion,
5357         newversion,
5358         attachversion,
5359         closeversion,
5360         findnode,
5361         zone_find,
5362         zone_findzonecut,
5363         attachnode,
5364         detachnode,
5365         expirenode,
5366         printnode,
5367         createiterator,
5368         zone_findrdataset,
5369         allrdatasets,
5370         addrdataset,
5371         subtractrdataset,
5372         deleterdataset,
5373         issecure,
5374         nodecount,
5375         ispersistent,
5376         overmem,
5377         settask,
5378         getoriginnode
5379 };
5380
5381 static dns_dbmethods_t cache_methods = {
5382         attach,
5383         detach,
5384         beginload,
5385         endload,
5386         dump,
5387         currentversion,
5388         newversion,
5389         attachversion,
5390         closeversion,
5391         findnode,
5392         cache_find,
5393         cache_findzonecut,
5394         attachnode,
5395         detachnode,
5396         expirenode,
5397         printnode,
5398         createiterator,
5399         cache_findrdataset,
5400         allrdatasets,
5401         addrdataset,
5402         subtractrdataset,
5403         deleterdataset,
5404         issecure,
5405         nodecount,
5406         ispersistent,
5407         overmem,
5408         settask,
5409         getoriginnode
5410 };
5411
5412 isc_result_t
5413 #ifdef DNS_RBTDB_VERSION64
5414 dns_rbtdb64_create
5415 #else
5416 dns_rbtdb_create
5417 #endif
5418                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
5419                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
5420                  void *driverarg, dns_db_t **dbp)
5421 {
5422         dns_rbtdb_t *rbtdb;
5423         isc_result_t result;
5424         int i;
5425         dns_name_t name;
5426
5427         /* Keep the compiler happy. */
5428         UNUSED(argc);
5429         UNUSED(argv);
5430         UNUSED(driverarg);
5431
5432         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
5433         if (rbtdb == NULL)
5434                 return (ISC_R_NOMEMORY);
5435
5436         memset(rbtdb, '\0', sizeof(*rbtdb));
5437         dns_name_init(&rbtdb->common.origin, NULL);
5438         rbtdb->common.attributes = 0;
5439         if (type == dns_dbtype_cache) {
5440                 rbtdb->common.methods = &cache_methods;
5441                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
5442         } else if (type == dns_dbtype_stub) {
5443                 rbtdb->common.methods = &zone_methods;
5444                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
5445         } else
5446                 rbtdb->common.methods = &zone_methods;
5447         rbtdb->common.rdclass = rdclass;
5448         rbtdb->common.mctx = NULL;
5449
5450         result = RBTDB_INITLOCK(&rbtdb->lock);
5451         if (result != ISC_R_SUCCESS)
5452                 goto cleanup_rbtdb;
5453
5454         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
5455         if (result != ISC_R_SUCCESS)
5456                 goto cleanup_lock;
5457
5458         if (rbtdb->node_lock_count == 0) {
5459                 if (IS_CACHE(rbtdb))
5460                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
5461                 else
5462                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
5463         }
5464         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
5465         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
5466                                         sizeof(rbtdb_nodelock_t));
5467         if (rbtdb->node_locks == NULL) {
5468                 result = ISC_R_NOMEMORY;
5469                 goto cleanup_tree_lock;
5470         }
5471
5472         rbtdb->active = rbtdb->node_lock_count;
5473
5474         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
5475                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
5476                 if (result == ISC_R_SUCCESS) {
5477                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
5478                         if (result != ISC_R_SUCCESS)
5479                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
5480                 }
5481                 if (result != ISC_R_SUCCESS) {
5482                         while (i-- > 0) {
5483                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
5484                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
5485                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
5486                         }
5487                         goto cleanup_node_locks;
5488                 }
5489                 rbtdb->node_locks[i].exiting = ISC_FALSE;
5490         }
5491         
5492         /*
5493          * Attach to the mctx.  The database will persist so long as there
5494          * are references to it, and attaching to the mctx ensures that our
5495          * mctx won't disappear out from under us.
5496          */
5497         isc_mem_attach(mctx, &rbtdb->common.mctx);
5498
5499         /*
5500          * Must be initalized before free_rbtdb() is called.
5501          */
5502         isc_ondestroy_init(&rbtdb->common.ondest);
5503
5504         /*
5505          * Make a copy of the origin name.
5506          */
5507         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
5508         if (result != ISC_R_SUCCESS) {
5509                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
5510                 return (result);
5511         }
5512
5513         /*
5514          * Make the Red-Black Tree.
5515          */
5516         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
5517         if (result != ISC_R_SUCCESS) {
5518                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
5519                 return (result);
5520         }
5521         /*
5522          * In order to set the node callback bit correctly in zone databases,
5523          * we need to know if the node has the origin name of the zone.
5524          * In loading_addrdataset() we could simply compare the new name
5525          * to the origin name, but this is expensive.  Also, we don't know the
5526          * node name in addrdataset(), so we need another way of knowing the
5527          * zone's top.
5528          *
5529          * We now explicitly create a node for the zone's origin, and then
5530          * we simply remember the node's address.  This is safe, because
5531          * the top-of-zone node can never be deleted, nor can its address
5532          * change.
5533          */
5534         if (!IS_CACHE(rbtdb)) {
5535                 rbtdb->origin_node = NULL;
5536                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
5537                                          &rbtdb->origin_node);
5538                 if (result != ISC_R_SUCCESS) {
5539                         INSIST(result != ISC_R_EXISTS);
5540                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
5541                         return (result);
5542                 }
5543                 /*
5544                  * We need to give the origin node the right locknum.
5545                  */
5546                 dns_name_init(&name, NULL);
5547                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
5548 #ifdef DNS_RBT_USEHASH
5549                 rbtdb->origin_node->locknum =
5550                         rbtdb->origin_node->hashval %
5551                         rbtdb->node_lock_count;
5552 #else
5553                 rbtdb->origin_node->locknum =
5554                         dns_name_hash(&name, ISC_TRUE) %
5555                         rbtdb->node_lock_count;
5556 #endif
5557         }
5558
5559         /*
5560          * Misc. Initialization.
5561          */
5562         result = isc_refcount_init(&rbtdb->references, 1);
5563         if (result != ISC_R_SUCCESS) {
5564                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
5565                 return (result);
5566         }
5567         rbtdb->attributes = 0;
5568         rbtdb->secure = ISC_FALSE;
5569         rbtdb->overmem = ISC_FALSE;
5570         rbtdb->task = NULL;
5571
5572         /*
5573          * Version Initialization.
5574          */
5575         rbtdb->current_serial = 1;
5576         rbtdb->least_serial = 1;
5577         rbtdb->next_serial = 2;
5578         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
5579         if (rbtdb->current_version == NULL) {
5580                 isc_refcount_decrement(&rbtdb->references, NULL);
5581                 isc_refcount_destroy(&rbtdb->references);
5582                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
5583                 return (ISC_R_NOMEMORY);
5584         }
5585         rbtdb->future_version = NULL;
5586         ISC_LIST_INIT(rbtdb->open_versions);
5587         /*
5588          * Keep the current version in the open list so that list operation
5589          * won't happen in normal lookup operations.
5590          */
5591         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
5592
5593         rbtdb->common.magic = DNS_DB_MAGIC;
5594         rbtdb->common.impmagic = RBTDB_MAGIC;
5595
5596         *dbp = (dns_db_t *)rbtdb;
5597
5598         return (ISC_R_SUCCESS);
5599
5600  cleanup_node_locks:
5601         isc_mem_put(mctx, rbtdb->node_locks,
5602                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
5603
5604  cleanup_tree_lock:
5605         isc_rwlock_destroy(&rbtdb->tree_lock);
5606
5607  cleanup_lock:
5608         RBTDB_DESTROYLOCK(&rbtdb->lock);
5609
5610  cleanup_rbtdb:
5611         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
5612         return (result);
5613 }
5614
5615
5616 /*
5617  * Slabbed Rdataset Methods
5618  */
5619
5620 static void
5621 rdataset_disassociate(dns_rdataset_t *rdataset) {
5622         dns_db_t *db = rdataset->private1;
5623         dns_dbnode_t *node = rdataset->private2;
5624
5625         detachnode(db, &node);
5626 }
5627
5628 static isc_result_t
5629 rdataset_first(dns_rdataset_t *rdataset) {
5630         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
5631         unsigned int count;
5632
5633         count = raw[0] * 256 + raw[1];
5634         if (count == 0) {
5635                 rdataset->private5 = NULL;
5636                 return (ISC_R_NOMORE);
5637         }
5638         
5639         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
5640                 raw += 2 + (4 * count);
5641         else
5642                 raw += 2;
5643
5644         /*
5645          * The privateuint4 field is the number of rdata beyond the
5646          * cursor position, so we decrement the total count by one
5647          * before storing it.
5648          *
5649          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
5650          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
5651          * to the first entry in the offset table.
5652          */
5653         count--;
5654         rdataset->privateuint4 = count;
5655         rdataset->private5 = raw;
5656
5657         return (ISC_R_SUCCESS);
5658 }
5659
5660 static isc_result_t
5661 rdataset_next(dns_rdataset_t *rdataset) {
5662         unsigned int count;
5663         unsigned int length;
5664         unsigned char *raw;     /* RDATASLAB */
5665
5666         count = rdataset->privateuint4;
5667         if (count == 0)
5668                 return (ISC_R_NOMORE);
5669         count--;
5670         rdataset->privateuint4 = count;
5671
5672         /*
5673          * Skip forward one record (length + 4) or one offset (4).
5674          */
5675         raw = rdataset->private5;
5676         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
5677                 length = raw[0] * 256 + raw[1];
5678                 raw += length;
5679         }
5680         rdataset->private5 = raw + 4;
5681
5682         return (ISC_R_SUCCESS);
5683 }
5684
5685 static void
5686 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
5687         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
5688         unsigned int offset;
5689         isc_region_t r;
5690
5691         REQUIRE(raw != NULL);
5692
5693         /*
5694          * Find the start of the record if not already in private5
5695          * then skip the length and order fields.
5696          */
5697         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
5698                 offset = (raw[0] << 24) + (raw[1] << 16) +
5699                          (raw[2] << 8) + raw[3];
5700                 raw = rdataset->private3;
5701                 raw += offset;
5702         }
5703         r.length = raw[0] * 256 + raw[1];
5704         raw += 4;
5705         r.base = raw;
5706         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
5707 }
5708
5709 static void
5710 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
5711         dns_db_t *db = source->private1;
5712         dns_dbnode_t *node = source->private2;
5713         dns_dbnode_t *cloned_node = NULL;
5714
5715         attachnode(db, node, &cloned_node);
5716         *target = *source;
5717
5718         /*
5719          * Reset iterator state.
5720          */
5721         target->privateuint4 = 0;
5722         target->private5 = NULL;
5723 }
5724
5725 static unsigned int
5726 rdataset_count(dns_rdataset_t *rdataset) {
5727         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
5728         unsigned int count;
5729
5730         count = raw[0] * 256 + raw[1];
5731
5732         return (count);
5733 }
5734
5735 static isc_result_t
5736 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
5737                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
5738 {
5739         dns_db_t *db = rdataset->private1;
5740         dns_dbnode_t *node = rdataset->private2;
5741         dns_dbnode_t *cloned_node;
5742         struct noqname *noqname = rdataset->private6;
5743
5744         cloned_node = NULL;
5745         attachnode(db, node, &cloned_node);
5746         nsec->methods = &rdataset_methods;
5747         nsec->rdclass = db->rdclass;
5748         nsec->type = dns_rdatatype_nsec;
5749         nsec->covers = 0;
5750         nsec->ttl = rdataset->ttl;
5751         nsec->trust = rdataset->trust;
5752         nsec->private1 = rdataset->private1;
5753         nsec->private2 = rdataset->private2;
5754         nsec->private3 = noqname->nsec;
5755         nsec->privateuint4 = 0;
5756         nsec->private5 = NULL;
5757         nsec->private6 = NULL;
5758
5759         cloned_node = NULL;
5760         attachnode(db, node, &cloned_node);
5761         nsecsig->methods = &rdataset_methods;
5762         nsecsig->rdclass = db->rdclass;
5763         nsecsig->type = dns_rdatatype_rrsig;
5764         nsecsig->covers = dns_rdatatype_nsec;
5765         nsecsig->ttl = rdataset->ttl;
5766         nsecsig->trust = rdataset->trust;
5767         nsecsig->private1 = rdataset->private1;
5768         nsecsig->private2 = rdataset->private2;
5769         nsecsig->private3 = noqname->nsecsig;
5770         nsecsig->privateuint4 = 0;
5771         nsecsig->private5 = NULL;
5772         nsec->private6 = NULL;
5773
5774         dns_name_clone(&noqname->name, name);
5775
5776         return (ISC_R_SUCCESS);
5777 }
5778
5779 /*
5780  * Rdataset Iterator Methods
5781  */
5782
5783 static void
5784 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
5785         rbtdb_rdatasetiter_t *rbtiterator;
5786
5787         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
5788
5789         if (rbtiterator->common.version != NULL)
5790                 closeversion(rbtiterator->common.db,
5791                              &rbtiterator->common.version, ISC_FALSE);
5792         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
5793         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
5794                     sizeof(*rbtiterator));
5795
5796         *iteratorp = NULL;
5797 }
5798
5799 static isc_result_t
5800 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
5801         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
5802         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
5803         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
5804         rbtdb_version_t *rbtversion = rbtiterator->common.version;
5805         rdatasetheader_t *header, *top_next;
5806         rbtdb_serial_t serial;
5807         isc_stdtime_t now;
5808
5809         if (IS_CACHE(rbtdb)) {
5810                 serial = 1;
5811                 now = rbtiterator->common.now;
5812         } else {
5813                 serial = rbtversion->serial;
5814                 now = 0;
5815         }
5816
5817         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5818                   isc_rwlocktype_read);
5819
5820         for (header = rbtnode->data; header != NULL; header = top_next) {
5821                 top_next = header->next;
5822                 do {
5823                         if (header->serial <= serial && !IGNORE(header)) {
5824                                 /*
5825                                  * Is this a "this rdataset doesn't exist"
5826                                  * record?  Or is it too old in the cache?
5827                                  *
5828                                  * Note: unlike everywhere else, we
5829                                  * check for now > header->ttl instead
5830                                  * of now >= header->ttl.  This allows
5831                                  * ANY and RRSIG queries for 0 TTL
5832                                  * rdatasets to work.
5833                                  */
5834                                 if (NONEXISTENT(header) ||
5835                                     (now != 0 && now > header->ttl))
5836                                         header = NULL;
5837                                 break;
5838                         } else
5839                                 header = header->down;
5840                 } while (header != NULL);
5841                 if (header != NULL)
5842                         break;
5843         }
5844
5845         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5846                     isc_rwlocktype_read);
5847
5848         rbtiterator->current = header;
5849
5850         if (header == NULL)
5851                 return (ISC_R_NOMORE);
5852
5853         return (ISC_R_SUCCESS);
5854 }
5855
5856 static isc_result_t
5857 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
5858         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
5859         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
5860         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
5861         rbtdb_version_t *rbtversion = rbtiterator->common.version;
5862         rdatasetheader_t *header, *top_next;
5863         rbtdb_serial_t serial;
5864         isc_stdtime_t now;
5865         rbtdb_rdatatype_t type, negtype;
5866         dns_rdatatype_t rdtype, covers;
5867
5868         header = rbtiterator->current;
5869         if (header == NULL)
5870                 return (ISC_R_NOMORE);
5871
5872         if (IS_CACHE(rbtdb)) {
5873                 serial = 1;
5874                 now = rbtiterator->common.now;
5875         } else {
5876                 serial = rbtversion->serial;
5877                 now = 0;
5878         }
5879
5880         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5881                   isc_rwlocktype_read);
5882
5883         type = header->type;
5884         rdtype = RBTDB_RDATATYPE_BASE(header->type);
5885         if (rdtype == 0) {
5886                 covers = RBTDB_RDATATYPE_EXT(header->type);
5887                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5888         } else 
5889                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5890         for (header = header->next; header != NULL; header = top_next) {
5891                 top_next = header->next;
5892                 /*
5893                  * If not walking back up the down list.
5894                  */
5895                 if (header->type != type && header->type != negtype) {
5896                         do {
5897                                 if (header->serial <= serial &&
5898                                     !IGNORE(header)) {
5899                                         /*
5900                                          * Is this a "this rdataset doesn't
5901                                          * exist" record?
5902                                          *
5903                                          * Note: unlike everywhere else, we
5904                                          * check for now > header->ttl instead
5905                                          * of now >= header->ttl.  This allows
5906                                          * ANY and RRSIG queries for 0 TTL
5907                                          * rdatasets to work.
5908                                          */
5909                                         if ((header->attributes &
5910                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
5911                                             (now != 0 && now > header->ttl))
5912                                                 header = NULL;
5913                                         break;
5914                                 } else
5915                                         header = header->down;
5916                         } while (header != NULL);
5917                         if (header != NULL)
5918                                 break;
5919                 }
5920         }
5921
5922         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5923                     isc_rwlocktype_read);
5924
5925         rbtiterator->current = header;
5926
5927         if (header == NULL)
5928                 return (ISC_R_NOMORE);
5929
5930         return (ISC_R_SUCCESS);
5931 }
5932
5933 static void
5934 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
5935         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
5936         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
5937         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
5938         rdatasetheader_t *header;
5939
5940         header = rbtiterator->current;
5941         REQUIRE(header != NULL);
5942
5943         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5944                   isc_rwlocktype_read);
5945
5946         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
5947                       rdataset);
5948
5949         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5950                     isc_rwlocktype_read);
5951 }
5952
5953
5954 /*
5955  * Database Iterator Methods
5956  */
5957
5958 static inline void
5959 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
5960         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
5961         dns_rbtnode_t *node = rbtdbiter->node;
5962
5963         if (node == NULL)
5964                 return;
5965
5966         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
5967         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
5968         new_reference(rbtdb, node);
5969         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
5970 }
5971
5972 static inline void
5973 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
5974         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
5975         dns_rbtnode_t *node = rbtdbiter->node;
5976         nodelock_t *lock;
5977
5978         if (node == NULL)
5979                 return;
5980
5981         lock = &rbtdb->node_locks[node->locknum].lock;
5982         NODE_LOCK(lock, isc_rwlocktype_read);
5983         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
5984                             rbtdbiter->tree_locked);
5985         NODE_UNLOCK(lock, isc_rwlocktype_read);
5986
5987         rbtdbiter->node = NULL;
5988 }
5989
5990 static void
5991 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
5992         dns_rbtnode_t *node;
5993         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
5994         isc_boolean_t was_read_locked = ISC_FALSE;
5995         nodelock_t *lock;
5996         int i;
5997
5998         if (rbtdbiter->delete != 0) {
5999                 /*
6000                  * Note that "%d node of %d in tree" can report things like
6001                  * "flush_deletions: 59 nodes of 41 in tree".  This means
6002                  * That some nodes appear on the deletions list more than
6003                  * once.  Only the last occurence will actually be deleted.
6004                  */
6005                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
6006                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
6007                               "flush_deletions: %d nodes of %d in tree",
6008                               rbtdbiter->delete,
6009                               dns_rbt_nodecount(rbtdb->tree));
6010
6011                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
6012                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6013                         was_read_locked = ISC_TRUE;
6014                 }
6015                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6016                 rbtdbiter->tree_locked = isc_rwlocktype_write;
6017
6018                 for (i = 0; i < rbtdbiter->delete; i++) {
6019                         node = rbtdbiter->deletions[i];
6020                         lock = &rbtdb->node_locks[node->locknum].lock;
6021
6022                         NODE_LOCK(lock, isc_rwlocktype_read);
6023                         decrement_reference(rbtdb, node, 0,
6024                                             isc_rwlocktype_read,
6025                                             rbtdbiter->tree_locked);
6026                         NODE_UNLOCK(lock, isc_rwlocktype_read);
6027                 }
6028
6029                 rbtdbiter->delete = 0;
6030
6031                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6032                 if (was_read_locked) {
6033                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6034                         rbtdbiter->tree_locked = isc_rwlocktype_read;
6035
6036                 } else {
6037                         rbtdbiter->tree_locked = isc_rwlocktype_none;
6038                 }
6039         }
6040 }
6041
6042 static inline void
6043 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
6044         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6045
6046         REQUIRE(rbtdbiter->paused);
6047         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
6048
6049         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6050         rbtdbiter->tree_locked = isc_rwlocktype_read;
6051
6052         rbtdbiter->paused = ISC_FALSE;
6053 }
6054
6055 static void
6056 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
6057         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
6058         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6059         dns_db_t *db = NULL;
6060
6061         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
6062                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6063                 rbtdbiter->tree_locked = isc_rwlocktype_none;
6064         } else
6065                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
6066
6067         dereference_iter_node(rbtdbiter);
6068
6069         flush_deletions(rbtdbiter);
6070
6071         dns_db_attach(rbtdbiter->common.db, &db);
6072         dns_db_detach(&rbtdbiter->common.db);
6073
6074         dns_rbtnodechain_reset(&rbtdbiter->chain);
6075         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
6076         dns_db_detach(&db);
6077
6078         *iteratorp = NULL;
6079 }
6080
6081 static isc_result_t
6082 dbiterator_first(dns_dbiterator_t *iterator) {
6083         isc_result_t result;
6084         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6085         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6086         dns_name_t *name, *origin;
6087
6088         if (rbtdbiter->result != ISC_R_SUCCESS &&
6089             rbtdbiter->result != ISC_R_NOMORE)
6090                 return (rbtdbiter->result);
6091
6092         if (rbtdbiter->paused)
6093                 resume_iteration(rbtdbiter);
6094
6095         dereference_iter_node(rbtdbiter);
6096
6097         name = dns_fixedname_name(&rbtdbiter->name);
6098         origin = dns_fixedname_name(&rbtdbiter->origin);
6099         dns_rbtnodechain_reset(&rbtdbiter->chain);
6100
6101         result = dns_rbtnodechain_first(&rbtdbiter->chain, rbtdb->tree, name,
6102                                         origin);
6103
6104         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
6105                 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
6106                                                   NULL, &rbtdbiter->node);
6107                 if (result == ISC_R_SUCCESS) {
6108                         rbtdbiter->new_origin = ISC_TRUE;
6109                         reference_iter_node(rbtdbiter);
6110                 }
6111         } else {
6112                 INSIST(result == ISC_R_NOTFOUND);
6113                 result = ISC_R_NOMORE; /* The tree is empty. */
6114         }
6115
6116         rbtdbiter->result = result;
6117
6118         return (result);
6119 }
6120
6121 static isc_result_t
6122 dbiterator_last(dns_dbiterator_t *iterator) {
6123         isc_result_t result;
6124         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6125         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6126         dns_name_t *name, *origin;
6127
6128         if (rbtdbiter->result != ISC_R_SUCCESS &&
6129             rbtdbiter->result != ISC_R_NOMORE)
6130                 return (rbtdbiter->result);
6131
6132         if (rbtdbiter->paused)
6133                 resume_iteration(rbtdbiter);
6134
6135         dereference_iter_node(rbtdbiter);
6136
6137         name = dns_fixedname_name(&rbtdbiter->name);
6138         origin = dns_fixedname_name(&rbtdbiter->origin);
6139         dns_rbtnodechain_reset(&rbtdbiter->chain);
6140
6141         result = dns_rbtnodechain_last(&rbtdbiter->chain, rbtdb->tree, name,
6142                                        origin);
6143         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
6144                 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
6145                                                   NULL, &rbtdbiter->node);
6146                 if (result == ISC_R_SUCCESS) {
6147                         rbtdbiter->new_origin = ISC_TRUE;
6148                         reference_iter_node(rbtdbiter);
6149                 }
6150         } else {
6151                 INSIST(result == ISC_R_NOTFOUND);
6152                 result = ISC_R_NOMORE; /* The tree is empty. */
6153         }
6154
6155         rbtdbiter->result = result;
6156
6157         return (result);
6158 }
6159
6160 static isc_result_t
6161 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
6162         isc_result_t result;
6163         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6164         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6165         dns_name_t *iname, *origin;
6166
6167         if (rbtdbiter->result != ISC_R_SUCCESS &&
6168             rbtdbiter->result != ISC_R_NOMORE)
6169                 return (rbtdbiter->result);
6170
6171         if (rbtdbiter->paused)
6172                 resume_iteration(rbtdbiter);
6173
6174         dereference_iter_node(rbtdbiter);
6175
6176         iname = dns_fixedname_name(&rbtdbiter->name);
6177         origin = dns_fixedname_name(&rbtdbiter->origin);
6178         dns_rbtnodechain_reset(&rbtdbiter->chain);
6179
6180         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &rbtdbiter->node,
6181                                   &rbtdbiter->chain, DNS_RBTFIND_EMPTYDATA,
6182                                   NULL, NULL);
6183         if (result == ISC_R_SUCCESS) {
6184                 result = dns_rbtnodechain_current(&rbtdbiter->chain, iname,
6185                                                   origin, NULL);
6186                 if (result == ISC_R_SUCCESS) {
6187                         rbtdbiter->new_origin = ISC_TRUE;
6188                         reference_iter_node(rbtdbiter);
6189                 }
6190
6191         } else if (result == DNS_R_PARTIALMATCH)
6192                 result = ISC_R_NOTFOUND;
6193
6194         rbtdbiter->result = result;
6195
6196         return (result);
6197 }
6198
6199 static isc_result_t
6200 dbiterator_prev(dns_dbiterator_t *iterator) {
6201         isc_result_t result;
6202         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6203         dns_name_t *name, *origin;
6204
6205         REQUIRE(rbtdbiter->node != NULL);
6206
6207         if (rbtdbiter->result != ISC_R_SUCCESS)
6208                 return (rbtdbiter->result);
6209
6210         if (rbtdbiter->paused)
6211                 resume_iteration(rbtdbiter);
6212
6213         name = dns_fixedname_name(&rbtdbiter->name);
6214         origin = dns_fixedname_name(&rbtdbiter->origin);
6215         result = dns_rbtnodechain_prev(&rbtdbiter->chain, name, origin);
6216
6217         dereference_iter_node(rbtdbiter);
6218
6219         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
6220                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
6221                 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
6222                                                   NULL, &rbtdbiter->node);
6223         }
6224
6225         if (result == ISC_R_SUCCESS)
6226                 reference_iter_node(rbtdbiter);
6227
6228         rbtdbiter->result = result;
6229
6230         return (result);
6231 }
6232
6233 static isc_result_t
6234 dbiterator_next(dns_dbiterator_t *iterator) {
6235         isc_result_t result;
6236         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6237         dns_name_t *name, *origin;
6238
6239         REQUIRE(rbtdbiter->node != NULL);
6240
6241         if (rbtdbiter->result != ISC_R_SUCCESS)
6242                 return (rbtdbiter->result);
6243
6244         if (rbtdbiter->paused)
6245                 resume_iteration(rbtdbiter);
6246
6247         name = dns_fixedname_name(&rbtdbiter->name);
6248         origin = dns_fixedname_name(&rbtdbiter->origin);
6249         result = dns_rbtnodechain_next(&rbtdbiter->chain, name, origin);
6250
6251         dereference_iter_node(rbtdbiter);
6252
6253         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
6254                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
6255                 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
6256                                                   NULL, &rbtdbiter->node);
6257         }
6258         if (result == ISC_R_SUCCESS)
6259                 reference_iter_node(rbtdbiter);
6260
6261         rbtdbiter->result = result;
6262
6263         return (result);
6264 }
6265
6266 static isc_result_t
6267 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
6268                    dns_name_t *name)
6269 {
6270         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6271         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6272         dns_rbtnode_t *node = rbtdbiter->node;
6273         isc_result_t result;
6274         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
6275         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
6276
6277         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
6278         REQUIRE(rbtdbiter->node != NULL);
6279
6280         if (rbtdbiter->paused)
6281                 resume_iteration(rbtdbiter);
6282
6283         if (name != NULL) {
6284                 if (rbtdbiter->common.relative_names)
6285                         origin = NULL;
6286                 result = dns_name_concatenate(nodename, origin, name, NULL);
6287                 if (result != ISC_R_SUCCESS)
6288                         return (result);
6289                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
6290                         result = DNS_R_NEWORIGIN;
6291         } else
6292                 result = ISC_R_SUCCESS;
6293
6294         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
6295         new_reference(rbtdb, node);
6296         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
6297
6298         *nodep = rbtdbiter->node;
6299
6300         if (iterator->cleaning && result == ISC_R_SUCCESS) {
6301                 isc_result_t expire_result;
6302
6303                 /*
6304                  * If the deletion array is full, flush it before trying
6305                  * to expire the current node.  The current node can't
6306                  * fully deleted while the iteration cursor is still on it.
6307                  */
6308                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
6309                         flush_deletions(rbtdbiter);
6310
6311                 expire_result = expirenode(iterator->db, *nodep, 0);
6312
6313                 /*
6314                  * expirenode() currently always returns success.
6315                  */
6316                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
6317                         unsigned int refs;
6318
6319                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
6320                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
6321                         dns_rbtnode_refincrement(node, &refs);
6322                         INSIST(refs != 0);
6323                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
6324                 }
6325         }
6326
6327         return (result);
6328 }
6329
6330 static isc_result_t
6331 dbiterator_pause(dns_dbiterator_t *iterator) {
6332         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6333         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6334
6335         if (rbtdbiter->result != ISC_R_SUCCESS &&
6336             rbtdbiter->result != ISC_R_NOMORE)
6337                 return (rbtdbiter->result);
6338
6339         if (rbtdbiter->paused)
6340                 return (ISC_R_SUCCESS);
6341
6342         rbtdbiter->paused = ISC_TRUE;
6343
6344         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
6345                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
6346                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6347                 rbtdbiter->tree_locked = isc_rwlocktype_none;
6348         }
6349
6350         flush_deletions(rbtdbiter);
6351
6352         return (ISC_R_SUCCESS);
6353 }
6354
6355 static isc_result_t
6356 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
6357         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6358         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
6359
6360         if (rbtdbiter->result != ISC_R_SUCCESS)
6361                 return (rbtdbiter->result);
6362
6363         return (dns_name_copy(origin, name, NULL));
6364 }
6365
6366 /*%
6367  * Additional cache routines.
6368  */
6369 static isc_result_t
6370 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
6371                        dns_rdatatype_t qtype, dns_acache_t *acache,
6372                        dns_zone_t **zonep, dns_db_t **dbp,
6373                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
6374                        dns_name_t *fname, dns_message_t *msg,
6375                        isc_stdtime_t now)
6376 {
6377         dns_rbtdb_t *rbtdb = rdataset->private1;
6378         dns_rbtnode_t *rbtnode = rdataset->private2;
6379         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
6380         unsigned int current_count = rdataset->privateuint4;
6381         unsigned int count;
6382         rdatasetheader_t *header;
6383         nodelock_t *nodelock;
6384         unsigned int total_count;
6385         acachectl_t *acarray;
6386         dns_acacheentry_t *entry;
6387         isc_result_t result;
6388
6389         UNUSED(qtype); /* we do not use this value at least for now */
6390         UNUSED(acache);
6391
6392         header = (struct rdatasetheader *)(raw - sizeof(*header));
6393
6394         total_count = raw[0] * 256 + raw[1];
6395         INSIST(total_count > current_count);
6396         count = total_count - current_count - 1;
6397
6398         acarray = NULL;
6399
6400         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
6401         NODE_LOCK(nodelock, isc_rwlocktype_read);
6402
6403         switch (type) {
6404         case dns_rdatasetadditional_fromauth:
6405                 acarray = header->additional_auth;
6406                 break;
6407         case dns_rdatasetadditional_fromcache:
6408                 acarray = NULL;
6409                 break;
6410         case dns_rdatasetadditional_fromglue:
6411                 acarray = header->additional_glue;
6412                 break;
6413         default:
6414                 INSIST(0);
6415         }
6416
6417         if (acarray == NULL) {
6418                 if (type != dns_rdatasetadditional_fromcache)
6419                         dns_acache_countquerymiss(acache);
6420                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
6421                 return (ISC_R_NOTFOUND);
6422         }
6423
6424         if (acarray[count].entry == NULL) {
6425                 dns_acache_countquerymiss(acache);
6426                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
6427                 return (ISC_R_NOTFOUND);
6428         }
6429
6430         entry = NULL;
6431         dns_acache_attachentry(acarray[count].entry, &entry);
6432
6433         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
6434
6435         result = dns_acache_getentry(entry, zonep, dbp, versionp,
6436                                      nodep, fname, msg, now);
6437
6438         dns_acache_detachentry(&entry);
6439
6440         return (result);
6441 }
6442
6443 static void
6444 acache_callback(dns_acacheentry_t *entry, void **arg) {
6445         dns_rbtdb_t *rbtdb;
6446         dns_rbtnode_t *rbtnode;
6447         nodelock_t *nodelock;
6448         acachectl_t *acarray = NULL;
6449         acache_cbarg_t *cbarg;
6450         unsigned int count;
6451
6452         REQUIRE(arg != NULL);
6453         cbarg = *arg;
6454
6455         /*
6456          * The caller must hold the entry lock.
6457          */
6458
6459         rbtdb = (dns_rbtdb_t *)cbarg->db;
6460         rbtnode = (dns_rbtnode_t *)cbarg->node;
6461
6462         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
6463         NODE_LOCK(nodelock, isc_rwlocktype_write);
6464
6465         switch (cbarg->type) {
6466         case dns_rdatasetadditional_fromauth:
6467                 acarray = cbarg->header->additional_auth;
6468                 break;
6469         case dns_rdatasetadditional_fromglue:
6470                 acarray = cbarg->header->additional_glue;
6471                 break;
6472         default:
6473                 INSIST(0);
6474         }
6475
6476         count = cbarg->count;
6477         if (acarray[count].entry == entry)
6478                 acarray[count].entry = NULL;
6479         INSIST(acarray[count].cbarg != NULL);
6480         isc_mem_put(rbtdb->common.mctx, acarray[count].cbarg,
6481                     sizeof(acache_cbarg_t));
6482         acarray[count].cbarg = NULL;
6483
6484         dns_acache_detachentry(&entry);
6485
6486         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
6487
6488         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
6489         dns_db_detach((dns_db_t **)(void*)&rbtdb);
6490
6491         *arg = NULL;
6492 }
6493
6494 static void
6495 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
6496                       acache_cbarg_t **cbargp)
6497 {
6498         acache_cbarg_t *cbarg;
6499
6500         REQUIRE(mctx != NULL);
6501         REQUIRE(entry != NULL);
6502         REQUIRE(cbargp != NULL && *cbargp != NULL);
6503
6504         cbarg = *cbargp;
6505
6506         dns_acache_cancelentry(entry);
6507         dns_db_detachnode(cbarg->db, &cbarg->node);
6508         dns_db_detach(&cbarg->db);
6509
6510         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
6511
6512         *cbargp = NULL;
6513 }
6514
6515 static isc_result_t
6516 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
6517                        dns_rdatatype_t qtype, dns_acache_t *acache,
6518                        dns_zone_t *zone, dns_db_t *db,
6519                        dns_dbversion_t *version, dns_dbnode_t *node,
6520                        dns_name_t *fname)
6521 {
6522         dns_rbtdb_t *rbtdb = rdataset->private1;
6523         dns_rbtnode_t *rbtnode = rdataset->private2;
6524         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
6525         unsigned int current_count = rdataset->privateuint4;
6526         rdatasetheader_t *header;
6527         unsigned int total_count, count;
6528         nodelock_t *nodelock;
6529         isc_result_t result;
6530         acachectl_t *acarray;
6531         dns_acacheentry_t *newentry, *oldentry = NULL;
6532         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
6533
6534         UNUSED(qtype);
6535
6536         if (type == dns_rdatasetadditional_fromcache)
6537                 return (ISC_R_SUCCESS);
6538
6539         header = (struct rdatasetheader *)(raw - sizeof(*header));
6540
6541         total_count = raw[0] * 256 + raw[1];
6542         INSIST(total_count > current_count);
6543         count = total_count - current_count - 1; /* should be private data */
6544
6545         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
6546         if (newcbarg == NULL)
6547                 return (ISC_R_NOMEMORY);
6548         newcbarg->type = type;
6549         newcbarg->count = count;
6550         newcbarg->header = header;
6551         newcbarg->db = NULL;
6552         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
6553         newcbarg->node = NULL;
6554         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
6555                           &newcbarg->node);
6556         newentry = NULL;
6557         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
6558                                         acache_callback, newcbarg, &newentry);
6559         if (result != ISC_R_SUCCESS)
6560                 goto fail;
6561         /* Set cache data in the new entry. */
6562         result = dns_acache_setentry(acache, newentry, zone, db,
6563                                      version, node, fname);
6564         if (result != ISC_R_SUCCESS)
6565                 goto fail;
6566
6567         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
6568         NODE_LOCK(nodelock, isc_rwlocktype_write);
6569
6570         acarray = NULL;
6571         switch (type) {
6572         case dns_rdatasetadditional_fromauth:
6573                 acarray = header->additional_auth;
6574                 break;
6575         case dns_rdatasetadditional_fromglue:
6576                 acarray = header->additional_glue;
6577                 break;
6578         default:
6579                 INSIST(0);
6580         }
6581
6582         if (acarray == NULL) {
6583                 unsigned int i;
6584
6585                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
6586                                       sizeof(acachectl_t));
6587
6588                 if (acarray == NULL) {
6589                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
6590                         goto fail;
6591                 }
6592
6593                 for (i = 0; i < total_count; i++) {
6594                         acarray[i].entry = NULL;
6595                         acarray[i].cbarg = NULL;
6596                 }
6597         }
6598         switch (type) {
6599         case dns_rdatasetadditional_fromauth:
6600                 header->additional_auth = acarray;
6601                 break;
6602         case dns_rdatasetadditional_fromglue:
6603                 header->additional_glue = acarray;
6604                 break;
6605         default:
6606                 INSIST(0);
6607         }
6608
6609         if (acarray[count].entry != NULL) {
6610                 /*
6611                  * Swap the entry.  Delay cleaning-up the old entry since
6612                  * it would require a node lock.
6613                  */
6614                 oldentry = acarray[count].entry;
6615                 INSIST(acarray[count].cbarg != NULL);
6616                 oldcbarg = acarray[count].cbarg;
6617         }
6618         acarray[count].entry = newentry;
6619         acarray[count].cbarg = newcbarg;
6620
6621         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
6622
6623         if (oldentry != NULL) {
6624                 if (oldcbarg != NULL)
6625                         acache_cancelentry(rbtdb->common.mctx, oldentry,
6626                                            &oldcbarg); 
6627                 dns_acache_detachentry(&oldentry);
6628         }
6629
6630         return (ISC_R_SUCCESS);
6631
6632   fail:
6633         if (newcbarg != NULL) {
6634                 if (newentry != NULL) {
6635                         acache_cancelentry(rbtdb->common.mctx, newentry,
6636                                            &newcbarg);
6637                         dns_acache_detachentry(&newentry);
6638                 } else {
6639                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
6640                         dns_db_detach(&newcbarg->db);
6641                         isc_mem_put(rbtdb->common.mctx, newcbarg,
6642                             sizeof(*newcbarg));
6643                 }
6644         }
6645
6646         return (result);
6647 }
6648
6649 static isc_result_t
6650 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
6651                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
6652
6653         dns_rbtdb_t *rbtdb = rdataset->private1;
6654         dns_rbtnode_t *rbtnode = rdataset->private2;
6655         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
6656         unsigned int current_count = rdataset->privateuint4;
6657         rdatasetheader_t *header;
6658         nodelock_t *nodelock;
6659         unsigned int total_count, count;
6660         acachectl_t *acarray;
6661         dns_acacheentry_t *entry;
6662         acache_cbarg_t *cbarg;
6663
6664         UNUSED(qtype);          /* we do not use this value at least for now */
6665         UNUSED(acache);
6666
6667         if (type == dns_rdatasetadditional_fromcache)
6668                 return (ISC_R_SUCCESS);
6669
6670         header = (struct rdatasetheader *)(raw - sizeof(*header));
6671
6672         total_count = raw[0] * 256 + raw[1];
6673         INSIST(total_count > current_count);
6674         count = total_count - current_count - 1;
6675
6676         acarray = NULL;
6677         entry = NULL;
6678
6679         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
6680         NODE_LOCK(nodelock, isc_rwlocktype_write);
6681
6682         switch (type) {
6683         case dns_rdatasetadditional_fromauth:
6684                 acarray = header->additional_auth;
6685                 break;
6686         case dns_rdatasetadditional_fromglue:
6687                 acarray = header->additional_glue;
6688                 break;
6689         default:
6690                 INSIST(0);
6691         }
6692
6693         if (acarray == NULL) {
6694                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
6695                 return (ISC_R_NOTFOUND);
6696         }
6697
6698         entry = acarray[count].entry;
6699         if (entry == NULL) {
6700                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
6701                 return (ISC_R_NOTFOUND);
6702         }
6703
6704         acarray[count].entry = NULL;
6705         cbarg = acarray[count].cbarg;
6706         acarray[count].cbarg = NULL;
6707
6708         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
6709
6710         if (entry != NULL) {
6711                 if (cbarg != NULL)
6712                         acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
6713                 dns_acache_detachentry(&entry);
6714         }
6715
6716         return (ISC_R_SUCCESS);
6717 }