]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/bind9/lib/dns/rbtdb.c
This commit was generated by cvs2svn to compensate for changes in r177576,
[FreeBSD/FreeBSD.git] / contrib / bind9 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004-2007  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: rbtdb.c,v 1.196.18.48 2007/08/28 07:20:04 tbox Exp $ */
19
20 /*! \file */
21
22 /*
23  * Principal Author: Bob Halley
24  */
25
26 #include <config.h>
27
28 #include <isc/event.h>
29 #include <isc/mem.h>
30 #include <isc/print.h>
31 #include <isc/mutex.h>
32 #include <isc/random.h>
33 #include <isc/refcount.h>
34 #include <isc/rwlock.h>
35 #include <isc/string.h>
36 #include <isc/task.h>
37 #include <isc/time.h>
38 #include <isc/util.h>
39
40 #include <dns/acache.h>
41 #include <dns/db.h>
42 #include <dns/dbiterator.h>
43 #include <dns/events.h>
44 #include <dns/fixedname.h>
45 #include <dns/lib.h>
46 #include <dns/log.h>
47 #include <dns/masterdump.h>
48 #include <dns/rbt.h>
49 #include <dns/rdata.h>
50 #include <dns/rdataset.h>
51 #include <dns/rdatasetiter.h>
52 #include <dns/rdataslab.h>
53 #include <dns/result.h>
54 #include <dns/view.h>
55 #include <dns/zone.h>
56 #include <dns/zonekey.h>
57
58 #ifdef DNS_RBTDB_VERSION64
59 #include "rbtdb64.h"
60 #else
61 #include "rbtdb.h"
62 #endif
63
64 #ifdef DNS_RBTDB_VERSION64
65 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
66 #else
67 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
68 #endif
69
70 /*%
71  * Note that "impmagic" is not the first four bytes of the struct, so
72  * ISC_MAGIC_VALID cannot be used.
73  */
74 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
75                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
76
77 #ifdef DNS_RBTDB_VERSION64
78 typedef isc_uint64_t                    rbtdb_serial_t;
79 /*%
80  * Make casting easier in symbolic debuggers by using different names
81  * for the 64 bit version.
82  */
83 #define dns_rbtdb_t dns_rbtdb64_t
84 #define rdatasetheader_t rdatasetheader64_t
85 #define rbtdb_version_t rbtdb_version64_t
86 #else
87 typedef isc_uint32_t                    rbtdb_serial_t;
88 #endif
89
90 typedef isc_uint32_t                    rbtdb_rdatatype_t;
91
92 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
93 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
94 #define RBTDB_RDATATYPE_VALUE(b, e)     (((e) << 16) | (b))
95
96 #define RBTDB_RDATATYPE_SIGNSEC \
97                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
98 #define RBTDB_RDATATYPE_SIGNS \
99                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
100 #define RBTDB_RDATATYPE_SIGCNAME \
101                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
102 #define RBTDB_RDATATYPE_SIGDNAME \
103                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
104 #define RBTDB_RDATATYPE_NCACHEANY \
105                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
106
107 /*
108  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
109  * Using rwlock is effective with regard to lookup performance only when
110  * it is implemented in an efficient way.
111  * Otherwise, it is generally wise to stick to the simple locking since rwlock
112  * would require more memory or can even make lookups slower due to its own
113  * overhead (when it internally calls mutex locks).
114  */
115 #ifdef ISC_RWLOCK_USEATOMIC
116 #define DNS_RBTDB_USERWLOCK 1
117 #else
118 #define DNS_RBTDB_USERWLOCK 0
119 #endif
120
121 #if DNS_RBTDB_USERWLOCK
122 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
123 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
124 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
125 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
126 #else
127 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
128 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
129 #define RBTDB_LOCK(l, t)        LOCK(l)
130 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
131 #endif
132
133 /*
134  * Since node locking is sensitive to both performance and memory footprint,
135  * we need some trick here.  If we have both high-performance rwlock and
136  * high performance and small-memory reference counters, we use rwlock for
137  * node lock and isc_refcount for node references.  In this case, we don't have
138  * to protect the access to the counters by locks.
139  * Otherwise, we simply use ordinary mutex lock for node locking, and use
140  * simple integers as reference counters which is protected by the lock.
141  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
142  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
143  * counters first and then protect other parts of a node as read-only data.
144  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
145  * provided for these special cases.  When we can use the efficient backend
146  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
147  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
148  * section including the access to the reference counter.
149  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
150  * section is also protected by NODE_STRONGLOCK().
151  */
152 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
153 typedef isc_rwlock_t nodelock_t;
154
155 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
156 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
157 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
158 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
159 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
160
161 #define NODE_STRONGLOCK(l)      ((void)0)
162 #define NODE_STRONGUNLOCK(l)    ((void)0)
163 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
164 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
165 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
166 #else
167 typedef isc_mutex_t nodelock_t;
168
169 #define NODE_INITLOCK(l)        isc_mutex_init(l)
170 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
171 #define NODE_LOCK(l, t)         LOCK(l)
172 #define NODE_UNLOCK(l, t)       UNLOCK(l)
173 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
174
175 #define NODE_STRONGLOCK(l)      LOCK(l)
176 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
177 #define NODE_WEAKLOCK(l, t)     ((void)0)
178 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
179 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
180 #endif
181
182 #ifndef DNS_RDATASET_FIXED
183 #define DNS_RDATASET_FIXED 1
184 #endif
185
186 /*
187  * Allow clients with a virtual time of upto 5 minutes in the past to see
188  * records that would have otherwise have expired.
189  */
190 #define RBTDB_VIRTUAL 300
191
192 struct noqname {
193         dns_name_t name;
194         void *     nsec;
195         void *     nsecsig;
196 };
197
198 typedef struct acachectl acachectl_t;  
199
200 typedef struct rdatasetheader {
201         /*%
202          * Locked by the owning node's lock.
203          */
204         rbtdb_serial_t                  serial;
205         dns_ttl_t                       ttl;
206         rbtdb_rdatatype_t               type;
207         isc_uint16_t                    attributes;
208         dns_trust_t                     trust;
209         struct noqname                  *noqname;
210         /*%<
211          * We don't use the LIST macros, because the LIST structure has
212          * both head and tail pointers, and is doubly linked.
213          */
214
215         struct rdatasetheader           *next;
216         /*%<
217          * If this is the top header for an rdataset, 'next' points
218          * to the top header for the next rdataset (i.e., the next type).
219          * Otherwise, it points up to the header whose down pointer points
220          * at this header.
221          */
222           
223         struct rdatasetheader           *down;
224         /*%<
225          * Points to the header for the next older version of
226          * this rdataset.
227          */
228
229         isc_uint32_t                    count;
230         /*%<
231          * Monotonously increased every time this rdataset is bound so that
232          * it is used as the base of the starting point in DNS responses
233          * when the "cyclic" rrset-order is required.  Since the ordering
234          * should not be so crucial, no lock is set for the counter for
235          * performance reasons.
236          */
237
238         acachectl_t                     *additional_auth;
239         acachectl_t                     *additional_glue;
240 } rdatasetheader_t;
241
242 #define RDATASET_ATTR_NONEXISTENT       0x0001
243 #define RDATASET_ATTR_STALE             0x0002
244 #define RDATASET_ATTR_IGNORE            0x0004
245 #define RDATASET_ATTR_RETAIN            0x0008
246 #define RDATASET_ATTR_NXDOMAIN          0x0010
247
248 typedef struct acache_cbarg {
249         dns_rdatasetadditional_t        type;
250         unsigned int                    count;
251         dns_db_t                        *db;
252         dns_dbnode_t                    *node;
253         rdatasetheader_t                *header;
254 } acache_cbarg_t;
255
256 struct acachectl {
257         dns_acacheentry_t               *entry;
258         acache_cbarg_t                  *cbarg;
259 };
260
261 /*
262  * XXX
263  * When the cache will pre-expire data (due to memory low or other
264  * situations) before the rdataset's TTL has expired, it MUST
265  * respect the RETAIN bit and not expire the data until its TTL is
266  * expired.
267  */
268
269 #undef IGNORE                   /* WIN32 winbase.h defines this. */
270
271 #define EXISTS(header) \
272         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
273 #define NONEXISTENT(header) \
274         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
275 #define IGNORE(header) \
276         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
277 #define RETAIN(header) \
278         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
279 #define NXDOMAIN(header) \
280         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
281
282 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
283 #define DEFAULT_CACHE_NODE_LOCK_COUNT   1009    /*%< Should be prime. */
284
285 typedef struct {
286         nodelock_t                      lock;
287         /* Protected in the refcount routines. */
288         isc_refcount_t                  references;
289         /* Locked by lock. */
290         isc_boolean_t                   exiting;
291 } rbtdb_nodelock_t;
292
293 typedef struct rbtdb_changed {
294         dns_rbtnode_t *                 node;
295         isc_boolean_t                   dirty;
296         ISC_LINK(struct rbtdb_changed)  link;
297 } rbtdb_changed_t;
298
299 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
300
301 typedef struct rbtdb_version {
302         /* Not locked */
303         rbtdb_serial_t                  serial;
304         /*
305          * Protected in the refcount routines.
306          * XXXJT: should we change the lock policy based on the refcount
307          * performance?
308          */
309         isc_refcount_t                  references;
310         /* Locked by database lock. */
311         isc_boolean_t                   writer;
312         isc_boolean_t                   commit_ok;
313         rbtdb_changedlist_t             changed_list;
314         ISC_LINK(struct rbtdb_version)  link;
315 } rbtdb_version_t;
316
317 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
318
319 typedef struct {
320         /* Unlocked. */
321         dns_db_t                        common;
322 #if DNS_RBTDB_USERWLOCK
323         isc_rwlock_t                    lock;
324 #else
325         isc_mutex_t                     lock;
326 #endif
327         isc_rwlock_t                    tree_lock;
328         unsigned int                    node_lock_count;
329         rbtdb_nodelock_t *              node_locks;
330         dns_rbtnode_t *                 origin_node;
331         /* Locked by lock. */
332         unsigned int                    active;
333         isc_refcount_t                  references;
334         unsigned int                    attributes;
335         rbtdb_serial_t                  current_serial;
336         rbtdb_serial_t                  least_serial;
337         rbtdb_serial_t                  next_serial;
338         rbtdb_version_t *               current_version;
339         rbtdb_version_t *               future_version;
340         rbtdb_versionlist_t             open_versions;
341         isc_boolean_t                   overmem;
342         isc_task_t *                    task;
343         dns_dbnode_t                    *soanode;
344         dns_dbnode_t                    *nsnode;
345         /* Locked by tree_lock. */
346         dns_rbt_t *                     tree;
347         isc_boolean_t                   secure;
348
349         /* Unlocked */
350         unsigned int                    quantum;
351 } dns_rbtdb_t;
352
353 #define RBTDB_ATTR_LOADED               0x01
354 #define RBTDB_ATTR_LOADING              0x02
355
356 /*%
357  * Search Context
358  */
359 typedef struct {
360         dns_rbtdb_t *           rbtdb;
361         rbtdb_version_t *       rbtversion;
362         rbtdb_serial_t          serial;
363         unsigned int            options;
364         dns_rbtnodechain_t      chain;
365         isc_boolean_t           copy_name;
366         isc_boolean_t           need_cleanup;
367         isc_boolean_t           wild;
368         dns_rbtnode_t *         zonecut;
369         rdatasetheader_t *      zonecut_rdataset;
370         rdatasetheader_t *      zonecut_sigrdataset;
371         dns_fixedname_t         zonecut_name;
372         isc_stdtime_t           now;
373 } rbtdb_search_t;
374
375 /*%
376  * Load Context
377  */
378 typedef struct {
379         dns_rbtdb_t *           rbtdb;
380         isc_stdtime_t           now;
381 } rbtdb_load_t;
382
383 static void rdataset_disassociate(dns_rdataset_t *rdataset);
384 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
385 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
386 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
387 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
388 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
389 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
390                                         dns_name_t *name,
391                                         dns_rdataset_t *nsec,
392                                         dns_rdataset_t *nsecsig);
393 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
394                                            dns_rdatasetadditional_t type,
395                                            dns_rdatatype_t qtype,
396                                            dns_acache_t *acache,
397                                            dns_zone_t **zonep,
398                                            dns_db_t **dbp,
399                                            dns_dbversion_t **versionp,
400                                            dns_dbnode_t **nodep,
401                                            dns_name_t *fname,
402                                            dns_message_t *msg,
403                                            isc_stdtime_t now);
404 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
405                                            dns_rdatasetadditional_t type,
406                                            dns_rdatatype_t qtype,
407                                            dns_acache_t *acache,
408                                            dns_zone_t *zone,
409                                            dns_db_t *db,
410                                            dns_dbversion_t *version,
411                                            dns_dbnode_t *node,
412                                            dns_name_t *fname);
413 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
414                                            dns_rdataset_t *rdataset,
415                                            dns_rdatasetadditional_t type,
416                                            dns_rdatatype_t qtype);
417
418 static dns_rdatasetmethods_t rdataset_methods = {
419         rdataset_disassociate,
420         rdataset_first,
421         rdataset_next,
422         rdataset_current,
423         rdataset_clone,
424         rdataset_count,
425         NULL,
426         rdataset_getnoqname,
427         rdataset_getadditional,
428         rdataset_setadditional,
429         rdataset_putadditional
430 };
431
432 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
433 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
434 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
435 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
436                                  dns_rdataset_t *rdataset);
437
438 static dns_rdatasetitermethods_t rdatasetiter_methods = {
439         rdatasetiter_destroy,
440         rdatasetiter_first,
441         rdatasetiter_next,
442         rdatasetiter_current
443 };
444
445 typedef struct rbtdb_rdatasetiter {
446         dns_rdatasetiter_t              common;
447         rdatasetheader_t *              current;
448 } rbtdb_rdatasetiter_t;
449
450 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
451 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
452 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
453 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
454                                         dns_name_t *name);
455 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
456 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
457 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
458                                            dns_dbnode_t **nodep,
459                                            dns_name_t *name);
460 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
461 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
462                                           dns_name_t *name);
463
464 static dns_dbiteratormethods_t dbiterator_methods = {
465         dbiterator_destroy,
466         dbiterator_first,
467         dbiterator_last,
468         dbiterator_seek,
469         dbiterator_prev,
470         dbiterator_next,
471         dbiterator_current,
472         dbiterator_pause,
473         dbiterator_origin
474 };
475
476 #define DELETION_BATCH_MAX 64
477
478 /*
479  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
480  */
481 typedef struct rbtdb_dbiterator {
482         dns_dbiterator_t                common;
483         isc_boolean_t                   paused;
484         isc_boolean_t                   new_origin;
485         isc_rwlocktype_t                tree_locked;
486         isc_result_t                    result;
487         dns_fixedname_t                 name;
488         dns_fixedname_t                 origin;
489         dns_rbtnodechain_t              chain;
490         dns_rbtnode_t                   *node;
491         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
492         int                             delete;
493 } rbtdb_dbiterator_t;
494
495
496 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
497 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
498
499 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
500                        isc_event_t *event);
501
502 /*%
503  * 'init_count' is used to initialize 'newheader->count' which inturn
504  * is used to determine where in the cycle rrset-order cyclic starts.
505  * We don't lock this as we don't care about simultanious updates.
506  *
507  * Note:
508  *      Both init_count and header->count can be ISC_UINT32_MAX.
509  *      The count on the returned rdataset however can't be as
510  *      that indicates that the database does not implement cyclic
511  *      processing.
512  */
513 static unsigned int init_count;
514
515 /*
516  * Locking
517  *
518  * If a routine is going to lock more than one lock in this module, then
519  * the locking must be done in the following order:
520  *
521  *      Tree Lock
522  *
523  *      Node Lock       (Only one from the set may be locked at one time by
524  *                       any caller)
525  *
526  *      Database Lock
527  *
528  * Failure to follow this hierarchy can result in deadlock.
529  */
530
531 /*
532  * Deleting Nodes
533  *
534  * Currently there is no deletion of nodes from the database, except when
535  * the database is being destroyed.
536  *
537  * If node deletion is added in the future, then for zone databases the node
538  * for the origin of the zone MUST NOT be deleted.
539  */
540
541
542 /*
543  * DB Routines
544  */
545
546 static void
547 attach(dns_db_t *source, dns_db_t **targetp) {
548         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
549
550         REQUIRE(VALID_RBTDB(rbtdb));
551
552         isc_refcount_increment(&rbtdb->references, NULL);
553
554         *targetp = source;
555 }
556
557 static void
558 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
559         dns_rbtdb_t *rbtdb = event->ev_arg;
560
561         UNUSED(task);
562
563         free_rbtdb(rbtdb, ISC_TRUE, event);
564 }
565
566 /*%
567  * Work out how many nodes can be deleted in the time between two
568  * requests to the nameserver.  Smooth the resulting number and use it
569  * as a estimate for the number of nodes to be deleted in the next
570  * iteration.
571  */
572 static unsigned int
573 adjust_quantum(unsigned int old, isc_time_t *start) {
574         unsigned int pps = dns_pps;     /* packets per second */
575         unsigned int interval;
576         isc_uint64_t usecs;
577         isc_time_t end;
578         unsigned int new;
579
580         if (pps < 100)
581                 pps = 100;
582         isc_time_now(&end);
583
584         interval = 1000000 / pps;       /* interval in usec */
585         if (interval == 0)
586                 interval = 1;
587         usecs = isc_time_microdiff(&end, start);
588         if (usecs == 0) {
589                 /*
590                  * We were unable to measure the amount of time taken.
591                  * Double the nodes deleted next time.
592                  */
593                 old *= 2;
594                 if (old > 1000)
595                         old = 1000;
596                 return (old);
597         }
598         new = old * interval;
599         new /= (unsigned int)usecs;
600         if (new == 0)
601                 new = 1;
602         else if (new > 1000)
603                 new = 1000;
604
605         /* Smooth */
606         new = (new + old * 3) / 4;
607         
608         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
609                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
610
611         return (new);
612 }
613                 
614 static void
615 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
616         unsigned int i;
617         isc_ondestroy_t ondest;
618         isc_result_t result;
619         char buf[DNS_NAME_FORMATSIZE];
620         isc_time_t start;
621
622         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
623         REQUIRE(rbtdb->future_version == NULL);
624
625         if (rbtdb->current_version != NULL) {
626                 unsigned int refs;
627
628                 isc_refcount_decrement(&rbtdb->current_version->references,
629                                        &refs);
630                 INSIST(refs == 0);
631                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
632                 isc_refcount_destroy(&rbtdb->current_version->references);
633                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
634                             sizeof(rbtdb_version_t));
635         }
636         if (event == NULL)
637                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
638  again:
639         if (rbtdb->tree != NULL) {
640                 isc_time_now(&start);
641                 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
642                 if (result == ISC_R_QUOTA) {
643                         INSIST(rbtdb->task != NULL);
644                         if (rbtdb->quantum != 0)
645                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
646                                                                 &start);
647                         if (event == NULL)
648                                 event = isc_event_allocate(rbtdb->common.mctx,
649                                                            NULL,
650                                                          DNS_EVENT_FREESTORAGE,
651                                                            free_rbtdb_callback,
652                                                            rbtdb,
653                                                            sizeof(isc_event_t));
654                         if (event == NULL)
655                                 goto again;
656                         isc_task_send(rbtdb->task, &event);
657                         return;
658                 }
659                 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
660         }
661         if (event != NULL)
662                 isc_event_free(&event);
663         if (log) {
664                 if (dns_name_dynamic(&rbtdb->common.origin))
665                         dns_name_format(&rbtdb->common.origin, buf,
666                                         sizeof(buf));
667                 else
668                         strcpy(buf, "<UNKNOWN>");
669                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
670                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
671                               "done free_rbtdb(%s)", buf);
672         }
673         if (dns_name_dynamic(&rbtdb->common.origin))
674                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
675         for (i = 0; i < rbtdb->node_lock_count; i++) {
676                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
677                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
678         }
679         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
680                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
681         isc_rwlock_destroy(&rbtdb->tree_lock);
682         isc_refcount_destroy(&rbtdb->references);
683         if (rbtdb->task != NULL)
684                 isc_task_detach(&rbtdb->task);
685         RBTDB_DESTROYLOCK(&rbtdb->lock);
686         rbtdb->common.magic = 0;
687         rbtdb->common.impmagic = 0;
688         ondest = rbtdb->common.ondest;
689         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
690         isc_ondestroy_notify(&ondest, rbtdb);
691 }
692
693 static inline void
694 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
695         isc_boolean_t want_free = ISC_FALSE;
696         unsigned int i;
697         unsigned int inactive = 0;
698
699         /* XXX check for open versions here */
700
701         if (rbtdb->soanode != NULL)
702                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
703         if (rbtdb->nsnode != NULL)
704                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
705
706         /*
707          * Even though there are no external direct references, there still
708          * may be nodes in use.
709          */
710         for (i = 0; i < rbtdb->node_lock_count; i++) {
711                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
712                 rbtdb->node_locks[i].exiting = ISC_TRUE;
713                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
714                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
715                     == 0) {
716                         inactive++;
717                 }
718         }
719
720         if (inactive != 0) {
721                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
722                 rbtdb->active -= inactive;
723                 if (rbtdb->active == 0)
724                         want_free = ISC_TRUE;
725                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
726                 if (want_free) {
727                         char buf[DNS_NAME_FORMATSIZE];
728                         if (dns_name_dynamic(&rbtdb->common.origin))
729                                 dns_name_format(&rbtdb->common.origin, buf,
730                                                 sizeof(buf));
731                         else
732                                 strcpy(buf, "<UNKNOWN>");
733                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
734                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
735                                       "calling free_rbtdb(%s)", buf);
736                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
737                 }
738         }
739 }
740
741 static void
742 detach(dns_db_t **dbp) {
743         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
744         unsigned int refs;
745
746         REQUIRE(VALID_RBTDB(rbtdb));
747
748         isc_refcount_decrement(&rbtdb->references, &refs);
749
750         if (refs == 0)
751                 maybe_free_rbtdb(rbtdb);
752
753         *dbp = NULL;
754 }
755
756 static void
757 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
758         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
759         rbtdb_version_t *version;
760         unsigned int refs;
761
762         REQUIRE(VALID_RBTDB(rbtdb));
763
764         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
765         version = rbtdb->current_version;
766         isc_refcount_increment(&version->references, &refs);
767         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
768
769         *versionp = (dns_dbversion_t *)version;
770 }
771
772 static inline rbtdb_version_t *
773 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
774                  unsigned int references, isc_boolean_t writer)
775 {
776         isc_result_t result;
777         rbtdb_version_t *version;
778
779         version = isc_mem_get(mctx, sizeof(*version));
780         if (version == NULL)
781                 return (NULL);
782         version->serial = serial;
783         result = isc_refcount_init(&version->references, references);
784         if (result != ISC_R_SUCCESS) {
785                 isc_mem_put(mctx, version, sizeof(*version));
786                 return (NULL);
787         }
788         version->writer = writer;
789         version->commit_ok = ISC_FALSE;
790         ISC_LIST_INIT(version->changed_list);
791         ISC_LINK_INIT(version, link);
792
793         return (version);
794 }
795
796 static isc_result_t
797 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
798         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
799         rbtdb_version_t *version;
800
801         REQUIRE(VALID_RBTDB(rbtdb));
802         REQUIRE(versionp != NULL && *versionp == NULL);
803         REQUIRE(rbtdb->future_version == NULL);
804
805         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
806         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
807         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
808                                    ISC_TRUE);
809         if (version != NULL) {
810                 version->commit_ok = ISC_TRUE;
811                 rbtdb->next_serial++;
812                 rbtdb->future_version = version;
813         }
814         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
815
816         if (version == NULL)
817                 return (ISC_R_NOMEMORY);
818
819         *versionp = version;
820
821         return (ISC_R_SUCCESS);
822 }
823
824 static void
825 attachversion(dns_db_t *db, dns_dbversion_t *source,
826               dns_dbversion_t **targetp)
827 {
828         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
829         rbtdb_version_t *rbtversion = source;
830         unsigned int refs;
831
832         REQUIRE(VALID_RBTDB(rbtdb));
833
834         isc_refcount_increment(&rbtversion->references, &refs);
835         INSIST(refs > 1);
836
837         *targetp = rbtversion;
838 }
839
840 static rbtdb_changed_t *
841 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
842             dns_rbtnode_t *node)
843 {
844         rbtdb_changed_t *changed;
845         unsigned int refs;
846
847         /*
848          * Caller must be holding the node lock if its reference must be
849          * protected by the lock.
850          */
851
852         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
853
854         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
855
856         REQUIRE(version->writer);
857
858         if (changed != NULL) {
859                 dns_rbtnode_refincrement(node, &refs);
860                 INSIST(refs != 0);
861                 changed->node = node;
862                 changed->dirty = ISC_FALSE;
863                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
864         } else
865                 version->commit_ok = ISC_FALSE;
866
867         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
868
869         return (changed);
870 }
871
872 static void
873 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
874                  acachectl_t *array)
875 {
876         unsigned int count;
877         unsigned int i;
878         unsigned char *raw;     /* RDATASLAB */
879
880         /*
881          * The caller must be holding the corresponding node lock.
882          */
883
884         if (array == NULL)
885                 return;
886
887         raw = (unsigned char *)header + sizeof(*header);
888         count = raw[0] * 256 + raw[1];
889
890         /*
891          * Sanity check: since an additional cache entry has a reference to
892          * the original DB node (in the callback arg), there should be no
893          * acache entries when the node can be freed. 
894          */
895         for (i = 0; i < count; i++)
896                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
897
898         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
899 }
900
901 static inline void
902 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
903
904         if (dns_name_dynamic(&(*noqname)->name))
905                 dns_name_free(&(*noqname)->name, mctx);
906         if ((*noqname)->nsec != NULL)
907                 isc_mem_put(mctx, (*noqname)->nsec,
908                             dns_rdataslab_size((*noqname)->nsec, 0));
909         if ((*noqname)->nsecsig != NULL)
910                 isc_mem_put(mctx, (*noqname)->nsecsig,
911                             dns_rdataslab_size((*noqname)->nsecsig, 0));
912         isc_mem_put(mctx, *noqname, sizeof(**noqname));
913         *noqname = NULL;
914 }
915
916 static inline void
917 free_rdataset(isc_mem_t *mctx, rdatasetheader_t *rdataset) {
918         unsigned int size;
919
920         if (rdataset->noqname != NULL)
921                 free_noqname(mctx, &rdataset->noqname);
922
923         free_acachearray(mctx, rdataset, rdataset->additional_auth);
924         free_acachearray(mctx, rdataset, rdataset->additional_glue);
925
926         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
927                 size = sizeof(*rdataset);
928         else
929                 size = dns_rdataslab_size((unsigned char *)rdataset,
930                                           sizeof(*rdataset));
931         isc_mem_put(mctx, rdataset, size);
932 }
933
934 static inline void
935 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
936         rdatasetheader_t *header, *dcurrent;
937         isc_boolean_t make_dirty = ISC_FALSE;
938
939         /*
940          * Caller must hold the node lock.
941          */
942
943         /*
944          * We set the IGNORE attribute on rdatasets with serial number
945          * 'serial'.  When the reference count goes to zero, these rdatasets
946          * will be cleaned up; until that time, they will be ignored.
947          */
948         for (header = node->data; header != NULL; header = header->next) {
949                 if (header->serial == serial) {
950                         header->attributes |= RDATASET_ATTR_IGNORE;
951                         make_dirty = ISC_TRUE;
952                 }
953                 for (dcurrent = header->down;
954                      dcurrent != NULL;
955                      dcurrent = dcurrent->down) {
956                         if (dcurrent->serial == serial) {
957                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
958                                 make_dirty = ISC_TRUE;
959                         }
960                 }
961         }
962         if (make_dirty)
963                 node->dirty = 1;
964 }
965
966 static inline void
967 clean_stale_headers(isc_mem_t *mctx, rdatasetheader_t *top) {
968         rdatasetheader_t *d, *down_next;
969
970         for (d = top->down; d != NULL; d = down_next) {
971                 down_next = d->down;
972                 free_rdataset(mctx, d);
973         }
974         top->down = NULL;
975 }
976
977 static inline void
978 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
979         rdatasetheader_t *current, *top_prev, *top_next;
980         isc_mem_t *mctx = rbtdb->common.mctx;
981
982         /*
983          * Caller must be holding the node lock.
984          */
985
986         top_prev = NULL;
987         for (current = node->data; current != NULL; current = top_next) {
988                 top_next = current->next;
989                 clean_stale_headers(mctx, current);
990                 /*
991                  * If current is nonexistent or stale, we can clean it up.
992                  */
993                 if ((current->attributes &
994                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
995                         if (top_prev != NULL)
996                                 top_prev->next = current->next;
997                         else
998                                 node->data = current->next;
999                         free_rdataset(mctx, current);
1000                 } else
1001                         top_prev = current;
1002         }
1003         node->dirty = 0;
1004 }
1005
1006 static inline void
1007 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1008                 rbtdb_serial_t least_serial)
1009 {
1010         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1011         rdatasetheader_t *top_prev, *top_next;
1012         isc_mem_t *mctx = rbtdb->common.mctx;
1013         isc_boolean_t still_dirty = ISC_FALSE;
1014
1015         /*
1016          * Caller must be holding the node lock.
1017          */
1018         REQUIRE(least_serial != 0);
1019
1020         top_prev = NULL;
1021         for (current = node->data; current != NULL; current = top_next) {
1022                 top_next = current->next;
1023
1024                 /*
1025                  * First, we clean up any instances of multiple rdatasets
1026                  * with the same serial number, or that have the IGNORE
1027                  * attribute.
1028                  */
1029                 dparent = current;
1030                 for (dcurrent = current->down;
1031                      dcurrent != NULL;
1032                      dcurrent = down_next) {
1033                         down_next = dcurrent->down;
1034                         INSIST(dcurrent->serial <= dparent->serial);
1035                         if (dcurrent->serial == dparent->serial ||
1036                             IGNORE(dcurrent)) {
1037                                 if (down_next != NULL)
1038                                         down_next->next = dparent;
1039                                 dparent->down = down_next;
1040                                 free_rdataset(mctx, dcurrent);
1041                         } else
1042                                 dparent = dcurrent;
1043                 }
1044
1045                 /*
1046                  * We've now eliminated all IGNORE datasets with the possible
1047                  * exception of current, which we now check.
1048                  */
1049                 if (IGNORE(current)) {
1050                         down_next = current->down;
1051                         if (down_next == NULL) {
1052                                 if (top_prev != NULL)
1053                                         top_prev->next = current->next;
1054                                 else
1055                                         node->data = current->next;
1056                                 free_rdataset(mctx, current);
1057                                 /*
1058                                  * current no longer exists, so we can
1059                                  * just continue with the loop.
1060                                  */
1061                                 continue;
1062                         } else {
1063                                 /*
1064                                  * Pull up current->down, making it the new
1065                                  * current.
1066                                  */
1067                                 if (top_prev != NULL)
1068                                         top_prev->next = down_next;
1069                                 else
1070                                         node->data = down_next;
1071                                 down_next->next = top_next;
1072                                 free_rdataset(mctx, current);
1073                                 current = down_next;
1074                         }
1075                 }
1076
1077                 /*
1078                  * We now try to find the first down node less than the
1079                  * least serial.
1080                  */
1081                 dparent = current;
1082                 for (dcurrent = current->down;
1083                      dcurrent != NULL;
1084                      dcurrent = down_next) {
1085                         down_next = dcurrent->down;
1086                         if (dcurrent->serial < least_serial)
1087                                 break;
1088                         dparent = dcurrent;
1089                 }
1090
1091                 /*
1092                  * If there is a such an rdataset, delete it and any older
1093                  * versions.
1094                  */
1095                 if (dcurrent != NULL) {
1096                         do {
1097                                 down_next = dcurrent->down;
1098                                 INSIST(dcurrent->serial <= least_serial);
1099                                 free_rdataset(mctx, dcurrent);
1100                                 dcurrent = down_next;
1101                         } while (dcurrent != NULL);
1102                         dparent->down = NULL;
1103                 }
1104
1105                 /*
1106                  * Note.  The serial number of 'current' might be less than
1107                  * least_serial too, but we cannot delete it because it is
1108                  * the most recent version, unless it is a NONEXISTENT
1109                  * rdataset.
1110                  */
1111                 if (current->down != NULL) {
1112                         still_dirty = ISC_TRUE;
1113                         top_prev = current;
1114                 } else {
1115                         /*
1116                          * If this is a NONEXISTENT rdataset, we can delete it.
1117                          */
1118                         if (NONEXISTENT(current)) {
1119                                 if (top_prev != NULL)
1120                                         top_prev->next = current->next;
1121                                 else
1122                                         node->data = current->next;
1123                                 free_rdataset(mctx, current);
1124                         } else
1125                                 top_prev = current;
1126                 }
1127         }
1128         if (!still_dirty)
1129                 node->dirty = 0;
1130 }
1131
1132 /*
1133  * Caller must be holding the node lock if its reference must be protected
1134  * by the lock.
1135  */
1136 static inline void
1137 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1138         unsigned int lockrefs, noderefs;
1139         isc_refcount_t *lockref;
1140
1141         dns_rbtnode_refincrement0(node, &noderefs);
1142         if (noderefs == 1) {    /* this is the first reference to the node */
1143                 lockref = &rbtdb->node_locks[node->locknum].references;
1144                 isc_refcount_increment0(lockref, &lockrefs);
1145                 INSIST(lockrefs != 0);
1146         }
1147         INSIST(noderefs != 0);
1148 }
1149
1150 /*
1151  * Caller must be holding the node lock; either the "strong", read or write
1152  * lock.  Note that the lock must be held even when node references are
1153  * atomically modified; in that case the decrement operation itself does not
1154  * have to be protected, but we must avoid a race condition where multiple
1155  * threads are decreasing the reference to zero simultaneously and at least
1156  * one of them is going to free the node.
1157  * This function returns ISC_TRUE if and only if the node reference decreases
1158  * to zero.
1159  */
1160 static isc_boolean_t
1161 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1162                     rbtdb_serial_t least_serial,
1163                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock)
1164 {
1165         isc_result_t result;
1166         isc_boolean_t write_locked;
1167         rbtdb_nodelock_t *nodelock;
1168         unsigned int refs, nrefs;
1169
1170         nodelock = &rbtdb->node_locks[node->locknum];
1171
1172         /* Handle easy and typical case first. */
1173         if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1174                 dns_rbtnode_refdecrement(node, &nrefs);
1175                 INSIST((int)nrefs >= 0);
1176                 if (nrefs == 0) {
1177                         isc_refcount_decrement(&nodelock->references, &refs);
1178                         INSIST((int)refs >= 0);
1179                 }
1180                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1181         }
1182
1183         /* Upgrade the lock? */
1184         if (nlock == isc_rwlocktype_read) {
1185                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1186                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1187         }
1188         dns_rbtnode_refdecrement(node, &nrefs);
1189         INSIST((int)nrefs >= 0);
1190         if (nrefs > 0) {
1191                 /* Restore the lock? */
1192                 if (nlock == isc_rwlocktype_read)
1193                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1194                 return (ISC_FALSE);
1195         }
1196
1197         if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1198                 if (IS_CACHE(rbtdb))
1199                         clean_cache_node(rbtdb, node);
1200                 else {
1201                         if (least_serial == 0) {
1202                                 /*
1203                                  * Caller doesn't know the least serial.
1204                                  * Get it.
1205                                  */
1206                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1207                                 least_serial = rbtdb->least_serial;
1208                                 RBTDB_UNLOCK(&rbtdb->lock,
1209                                              isc_rwlocktype_read);
1210                         }
1211                         clean_zone_node(rbtdb, node, least_serial);
1212                 }
1213         }
1214
1215         isc_refcount_decrement(&nodelock->references, &refs);
1216         INSIST((int)refs >= 0);
1217
1218         /*
1219          * XXXDCL should this only be done for cache zones?
1220          */
1221         if (node->data != NULL || node->down != NULL) {
1222                 /* Restore the lock? */
1223                 if (nlock == isc_rwlocktype_read)
1224                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1225                 return (ISC_TRUE);
1226         }
1227
1228         /*
1229          * XXXDCL need to add a deferred delete method for ISC_R_LOCKBUSY.
1230          */
1231         if (tlock != isc_rwlocktype_write) {
1232                 /*
1233                  * Locking hierarchy notwithstanding, we don't need to free
1234                  * the node lock before acquiring the tree write lock because
1235                  * we only do a trylock.
1236                  */
1237                 if (tlock == isc_rwlocktype_read)
1238                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1239                 else
1240                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
1241                                                     isc_rwlocktype_write);
1242                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1243                               result == ISC_R_LOCKBUSY);
1244  
1245                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1246         } else
1247                 write_locked = ISC_TRUE;
1248
1249         if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1250                 /*
1251                  * We can now delete the node if the reference counter is
1252                  * zero.  This should be typically the case, but a different
1253                  * thread may still gain a (new) reference just before the
1254                  * current thread locks the tree (e.g., in findnode()).
1255                  */
1256
1257                 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1258                         char printname[DNS_NAME_FORMATSIZE];
1259
1260                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1261                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1262                                       "decrement_reference: "
1263                                       "delete from rbt: %p %s",
1264                                       node,
1265                                       dns_rbt_formatnodename(node, printname,
1266                                                            sizeof(printname)));
1267                 }
1268
1269                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1270                 if (result != ISC_R_SUCCESS)
1271                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1272                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1273                                       "decrement_reference: "
1274                                       "dns_rbt_deletenode: %s",
1275                                       isc_result_totext(result));
1276         }
1277
1278         /* Restore the lock? */
1279         if (nlock == isc_rwlocktype_read)
1280                 NODE_WEAKDOWNGRADE(&nodelock->lock);
1281
1282         /*
1283          * Relock a read lock, or unlock the write lock if no lock was held.
1284          */
1285         if (tlock == isc_rwlocktype_none)
1286                 if (write_locked)
1287                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1288
1289         if (tlock == isc_rwlocktype_read)
1290                 if (write_locked)
1291                         isc_rwlock_downgrade(&rbtdb->tree_lock);
1292
1293         return (ISC_TRUE);
1294 }
1295
1296 static inline void
1297 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1298                    rbtdb_changedlist_t *cleanup_list)
1299 {
1300         /*
1301          * Caller must be holding the database lock.
1302          */
1303
1304         rbtdb->least_serial = version->serial;
1305         *cleanup_list = version->changed_list;
1306         ISC_LIST_INIT(version->changed_list);
1307 }
1308
1309 static inline void
1310 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1311         rbtdb_changed_t *changed, *next_changed;
1312
1313         /*
1314          * If the changed record is dirty, then
1315          * an update created multiple versions of
1316          * a given rdataset.  We keep this list
1317          * until we're the least open version, at
1318          * which point it's safe to get rid of any
1319          * older versions.
1320          *
1321          * If the changed record isn't dirty, then
1322          * we don't need it anymore since we're
1323          * committing and not rolling back.
1324          *
1325          * The caller must be holding the database lock.
1326          */
1327         for (changed = HEAD(version->changed_list);
1328              changed != NULL;
1329              changed = next_changed) {
1330                 next_changed = NEXT(changed, link);
1331                 if (!changed->dirty) {
1332                         UNLINK(version->changed_list,
1333                                changed, link);
1334                         APPEND(*cleanup_list,
1335                                changed, link);
1336                 }
1337         }
1338 }
1339
1340 static isc_boolean_t
1341 iszonesecure(dns_db_t *db, dns_dbnode_t *origin) {
1342         dns_rdataset_t keyset;
1343         dns_rdataset_t nsecset, signsecset;
1344         isc_boolean_t haszonekey = ISC_FALSE;
1345         isc_boolean_t hasnsec = ISC_FALSE;
1346         isc_result_t result;
1347
1348         dns_rdataset_init(&keyset);
1349         result = dns_db_findrdataset(db, origin, NULL, dns_rdatatype_dnskey, 0,
1350                                      0, &keyset, NULL);
1351         if (result == ISC_R_SUCCESS) {
1352                 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1353                 result = dns_rdataset_first(&keyset);
1354                 while (result == ISC_R_SUCCESS) {
1355                         dns_rdataset_current(&keyset, &keyrdata);
1356                         if (dns_zonekey_iszonekey(&keyrdata)) {
1357                                 haszonekey = ISC_TRUE;
1358                                 break;
1359                         }
1360                         result = dns_rdataset_next(&keyset);
1361                 }
1362                 dns_rdataset_disassociate(&keyset);
1363         }
1364         if (!haszonekey)
1365                 return (ISC_FALSE);
1366
1367         dns_rdataset_init(&nsecset);
1368         dns_rdataset_init(&signsecset);
1369         result = dns_db_findrdataset(db, origin, NULL, dns_rdatatype_nsec, 0,
1370                                      0, &nsecset, &signsecset);
1371         if (result == ISC_R_SUCCESS) {
1372                 if (dns_rdataset_isassociated(&signsecset)) {
1373                         hasnsec = ISC_TRUE;
1374                         dns_rdataset_disassociate(&signsecset);
1375                 }
1376                 dns_rdataset_disassociate(&nsecset);
1377         }
1378         return (hasnsec);
1379 }
1380
1381 static void
1382 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
1383         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1384         rbtdb_version_t *version, *cleanup_version, *least_greater;
1385         isc_boolean_t rollback = ISC_FALSE;
1386         rbtdb_changedlist_t cleanup_list;
1387         rbtdb_changed_t *changed, *next_changed;
1388         rbtdb_serial_t serial, least_serial;
1389         dns_rbtnode_t *rbtnode;
1390         unsigned int refs;
1391
1392         REQUIRE(VALID_RBTDB(rbtdb));
1393         version = (rbtdb_version_t *)*versionp;
1394
1395         cleanup_version = NULL;
1396         ISC_LIST_INIT(cleanup_list);
1397
1398         isc_refcount_decrement(&version->references, &refs);
1399         if (refs > 0) {         /* typical and easy case first */
1400                 if (commit) {
1401                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1402                         INSIST(!version->writer);
1403                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1404                 }
1405                 goto end;
1406         }
1407
1408         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1409         serial = version->serial;
1410         if (version->writer) {
1411                 if (commit) {
1412                         unsigned cur_ref;
1413                         rbtdb_version_t *cur_version;
1414
1415                         INSIST(version->commit_ok);
1416                         INSIST(version == rbtdb->future_version);
1417                         /*
1418                          * The current version is going to be replaced.
1419                          * Release the (likely last) reference to it from the
1420                          * DB itself and unlink it from the open list.
1421                          */
1422                         cur_version = rbtdb->current_version;
1423                         isc_refcount_decrement(&cur_version->references,
1424                                                &cur_ref);
1425                         if (cur_ref == 0) {
1426                                 if (cur_version->serial == rbtdb->least_serial)
1427                                         INSIST(EMPTY(cur_version->changed_list));
1428                                 UNLINK(rbtdb->open_versions,
1429                                        cur_version, link);
1430                         }
1431                         if (EMPTY(rbtdb->open_versions)) {
1432                                 /*
1433                                  * We're going to become the least open
1434                                  * version.
1435                                  */
1436                                 make_least_version(rbtdb, version,
1437                                                    &cleanup_list);
1438                         } else {
1439                                 /*
1440                                  * Some other open version is the
1441                                  * least version.  We can't cleanup
1442                                  * records that were changed in this
1443                                  * version because the older versions
1444                                  * may still be in use by an open
1445                                  * version.
1446                                  *
1447                                  * We can, however, discard the
1448                                  * changed records for things that
1449                                  * we've added that didn't exist in
1450                                  * prior versions.
1451                                  */
1452                                 cleanup_nondirty(version, &cleanup_list);
1453                         }
1454                         /*
1455                          * If the (soon to be former) current version
1456                          * isn't being used by anyone, we can clean
1457                          * it up.
1458                          */
1459                         if (cur_ref == 0) {
1460                                 cleanup_version = cur_version;
1461                                 APPENDLIST(version->changed_list,
1462                                            cleanup_version->changed_list,
1463                                            link);
1464                         }
1465                         /*
1466                          * Become the current version.
1467                          */
1468                         version->writer = ISC_FALSE;
1469                         rbtdb->current_version = version;
1470                         rbtdb->current_serial = version->serial;
1471                         rbtdb->future_version = NULL;
1472
1473                         /*
1474                          * Keep the current version in the open list, and
1475                          * gain a reference for the DB itself (see the DB
1476                          * creation function below).  This must be the only
1477                          * case where we need to increment the counter from
1478                          * zero and need to use isc_refcount_increment0().
1479                          */
1480                         isc_refcount_increment0(&version->references,
1481                                                 &cur_ref);
1482                         INSIST(cur_ref == 1);
1483                         PREPEND(rbtdb->open_versions,
1484                                 rbtdb->current_version, link);
1485                 } else {
1486                         /*
1487                          * We're rolling back this transaction.
1488                          */
1489                         cleanup_list = version->changed_list;
1490                         ISC_LIST_INIT(version->changed_list);
1491                         rollback = ISC_TRUE;
1492                         cleanup_version = version;
1493                         rbtdb->future_version = NULL;
1494                 }
1495         } else {
1496                 if (version != rbtdb->current_version) {
1497                         /*
1498                          * There are no external or internal references
1499                          * to this version and it can be cleaned up.
1500                          */
1501                         cleanup_version = version;
1502
1503                         /*
1504                          * Find the version with the least serial
1505                          * number greater than ours.
1506                          */
1507                         least_greater = PREV(version, link);
1508                         if (least_greater == NULL)
1509                                 least_greater = rbtdb->current_version;
1510
1511                         INSIST(version->serial < least_greater->serial);
1512                         /*
1513                          * Is this the least open version?
1514                          */
1515                         if (version->serial == rbtdb->least_serial) {
1516                                 /*
1517                                  * Yes.  Install the new least open
1518                                  * version.
1519                                  */
1520                                 make_least_version(rbtdb,
1521                                                    least_greater,
1522                                                    &cleanup_list);
1523                         } else {
1524                                 /*
1525                                  * Add any unexecuted cleanups to
1526                                  * those of the least greater version.
1527                                  */
1528                                 APPENDLIST(least_greater->changed_list,
1529                                            version->changed_list,
1530                                            link);
1531                         }
1532                 } else if (version->serial == rbtdb->least_serial)
1533                         INSIST(EMPTY(version->changed_list));
1534                 UNLINK(rbtdb->open_versions, version, link);
1535         }
1536         least_serial = rbtdb->least_serial;
1537         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1538
1539         /*
1540          * Update the zone's secure status.
1541          */
1542         if (version->writer && commit && !IS_CACHE(rbtdb))
1543                 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
1544
1545         if (cleanup_version != NULL) {
1546                 INSIST(EMPTY(cleanup_version->changed_list));
1547                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
1548                             sizeof(*cleanup_version));
1549         }
1550
1551         if (!EMPTY(cleanup_list)) {
1552                 for (changed = HEAD(cleanup_list);
1553                      changed != NULL;
1554                      changed = next_changed) {
1555                         nodelock_t *lock;
1556
1557                         next_changed = NEXT(changed, link);
1558                         rbtnode = changed->node;
1559                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
1560
1561                         NODE_LOCK(lock, isc_rwlocktype_write);
1562                         if (rollback)
1563                                 rollback_node(rbtnode, serial);
1564                         decrement_reference(rbtdb, rbtnode, least_serial,
1565                                             isc_rwlocktype_write,
1566                                             isc_rwlocktype_none);
1567                         NODE_UNLOCK(lock, isc_rwlocktype_write);
1568
1569                         isc_mem_put(rbtdb->common.mctx, changed,
1570                                     sizeof(*changed));
1571                 }
1572         }
1573
1574   end:
1575         *versionp = NULL;
1576 }
1577
1578 /*
1579  * Add the necessary magic for the wildcard name 'name'
1580  * to be found in 'rbtdb'.
1581  *
1582  * In order for wildcard matching to work correctly in
1583  * zone_find(), we must ensure that a node for the wildcarding
1584  * level exists in the database, and has its 'find_callback'
1585  * and 'wild' bits set.
1586  *
1587  * E.g. if the wildcard name is "*.sub.example." then we
1588  * must ensure that "sub.example." exists and is marked as
1589  * a wildcard level.
1590  */
1591 static isc_result_t
1592 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
1593         isc_result_t result;
1594         dns_name_t foundname;
1595         dns_offsets_t offsets;
1596         unsigned int n;
1597         dns_rbtnode_t *node = NULL;
1598
1599         dns_name_init(&foundname, offsets);
1600         n = dns_name_countlabels(name);
1601         INSIST(n >= 2);
1602         n--;
1603         dns_name_getlabelsequence(name, 1, n, &foundname);
1604         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
1605         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
1606                 return (result);
1607         node->find_callback = 1;
1608         node->wild = 1;
1609         return (ISC_R_SUCCESS);
1610 }
1611
1612 static isc_result_t
1613 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
1614         isc_result_t result;
1615         dns_name_t foundname;
1616         dns_offsets_t offsets;
1617         unsigned int n, l, i;
1618
1619         dns_name_init(&foundname, offsets);
1620         n = dns_name_countlabels(name);
1621         l = dns_name_countlabels(&rbtdb->common.origin);
1622         i = l + 1;
1623         while (i < n) {
1624                 dns_rbtnode_t *node = NULL;     /* dummy */
1625                 dns_name_getlabelsequence(name, n - i, i, &foundname);
1626                 if (dns_name_iswildcard(&foundname)) {
1627                         result = add_wildcard_magic(rbtdb, &foundname);
1628                         if (result != ISC_R_SUCCESS)
1629                                 return (result);
1630                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
1631                                                  &node);
1632                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
1633                                 return (result);
1634                 }
1635                 i++;
1636         }
1637         return (ISC_R_SUCCESS);
1638 }
1639
1640 static isc_result_t
1641 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
1642          dns_dbnode_t **nodep)
1643 {
1644         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1645         dns_rbtnode_t *node = NULL;
1646         dns_name_t nodename;
1647         isc_result_t result;
1648         isc_rwlocktype_t locktype = isc_rwlocktype_read;
1649
1650         REQUIRE(VALID_RBTDB(rbtdb));
1651
1652         dns_name_init(&nodename, NULL);
1653         RWLOCK(&rbtdb->tree_lock, locktype);
1654         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
1655                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
1656         if (result != ISC_R_SUCCESS) {
1657                 RWUNLOCK(&rbtdb->tree_lock, locktype);
1658                 if (!create) {
1659                         if (result == DNS_R_PARTIALMATCH)
1660                                 result = ISC_R_NOTFOUND;
1661                         return (result);
1662                 }
1663                 /*
1664                  * It would be nice to try to upgrade the lock instead of
1665                  * unlocking then relocking.
1666                  */
1667                 locktype = isc_rwlocktype_write;
1668                 RWLOCK(&rbtdb->tree_lock, locktype);
1669                 node = NULL;
1670                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
1671                 if (result == ISC_R_SUCCESS) {
1672                         dns_rbt_namefromnode(node, &nodename);
1673 #ifdef DNS_RBT_USEHASH
1674                         node->locknum = node->hashval % rbtdb->node_lock_count;
1675 #else
1676                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
1677                                 rbtdb->node_lock_count;
1678 #endif
1679                         add_empty_wildcards(rbtdb, name);
1680
1681                         if (dns_name_iswildcard(name)) {
1682                                 result = add_wildcard_magic(rbtdb, name);
1683                                 if (result != ISC_R_SUCCESS) {
1684                                         RWUNLOCK(&rbtdb->tree_lock, locktype);
1685                                         return (result);
1686                                 }
1687                         }
1688                 } else if (result != ISC_R_EXISTS) {
1689                         RWUNLOCK(&rbtdb->tree_lock, locktype);
1690                         return (result);
1691                 }
1692         }
1693         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1694         new_reference(rbtdb, node);
1695         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1696         RWUNLOCK(&rbtdb->tree_lock, locktype);
1697
1698         *nodep = (dns_dbnode_t *)node;
1699
1700         return (ISC_R_SUCCESS);
1701 }
1702
1703 static isc_result_t
1704 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
1705         rbtdb_search_t *search = arg;
1706         rdatasetheader_t *header, *header_next;
1707         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
1708         rdatasetheader_t *found;
1709         isc_result_t result;
1710         dns_rbtnode_t *onode;
1711
1712         /*
1713          * We only want to remember the topmost zone cut, since it's the one
1714          * that counts, so we'll just continue if we've already found a
1715          * zonecut.
1716          */
1717         if (search->zonecut != NULL)
1718                 return (DNS_R_CONTINUE);
1719
1720         found = NULL;
1721         result = DNS_R_CONTINUE;
1722         onode = search->rbtdb->origin_node;
1723
1724         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
1725                   isc_rwlocktype_read);
1726
1727         /*
1728          * Look for an NS or DNAME rdataset active in our version.
1729          */
1730         ns_header = NULL;
1731         dname_header = NULL;
1732         sigdname_header = NULL;
1733         for (header = node->data; header != NULL; header = header_next) {
1734                 header_next = header->next;
1735                 if (header->type == dns_rdatatype_ns ||
1736                     header->type == dns_rdatatype_dname ||
1737                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
1738                         do {
1739                                 if (header->serial <= search->serial &&
1740                                     !IGNORE(header)) {
1741                                         /*
1742                                          * Is this a "this rdataset doesn't
1743                                          * exist" record?
1744                                          */
1745                                         if (NONEXISTENT(header))
1746                                                 header = NULL;
1747                                         break;
1748                                 } else
1749                                         header = header->down;
1750                         } while (header != NULL);
1751                         if (header != NULL) {
1752                                 if (header->type == dns_rdatatype_dname)
1753                                         dname_header = header;
1754                                 else if (header->type == 
1755                                            RBTDB_RDATATYPE_SIGDNAME)
1756                                         sigdname_header = header;
1757                                 else if (node != onode ||
1758                                          IS_STUB(search->rbtdb)) {
1759                                         /*
1760                                          * We've found an NS rdataset that
1761                                          * isn't at the origin node.  We check
1762                                          * that they're not at the origin node,
1763                                          * because otherwise we'd erroneously
1764                                          * treat the zone top as if it were
1765                                          * a delegation.
1766                                          */
1767                                         ns_header = header;
1768                                 }
1769                         }
1770                 }
1771         }
1772
1773         /*
1774          * Did we find anything?
1775          */
1776         if (dname_header != NULL) {
1777                 /*
1778                  * Note that DNAME has precedence over NS if both exist.
1779                  */
1780                 found = dname_header;
1781                 search->zonecut_sigrdataset = sigdname_header;
1782         } else if (ns_header != NULL) {
1783                 found = ns_header;
1784                 search->zonecut_sigrdataset = NULL;
1785         }
1786
1787         if (found != NULL) {
1788                 /*
1789                  * We increment the reference count on node to ensure that
1790                  * search->zonecut_rdataset will still be valid later.
1791                  */
1792                 new_reference(search->rbtdb, node);
1793                 search->zonecut = node;
1794                 search->zonecut_rdataset = found;
1795                 search->need_cleanup = ISC_TRUE;
1796                 /*
1797                  * Since we've found a zonecut, anything beneath it is
1798                  * glue and is not subject to wildcard matching, so we
1799                  * may clear search->wild.
1800                  */
1801                 search->wild = ISC_FALSE;
1802                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
1803                         /*
1804                          * If the caller does not want to find glue, then
1805                          * this is the best answer and the search should
1806                          * stop now.
1807                          */
1808                         result = DNS_R_PARTIALMATCH;
1809                 } else {
1810                         dns_name_t *zcname;
1811
1812                         /*
1813                          * The search will continue beneath the zone cut.
1814                          * This may or may not be the best match.  In case it
1815                          * is, we need to remember the node name.
1816                          */
1817                         zcname = dns_fixedname_name(&search->zonecut_name);
1818                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
1819                                       ISC_R_SUCCESS);
1820                         search->copy_name = ISC_TRUE;
1821                 }
1822         } else {
1823                 /*
1824                  * There is no zonecut at this node which is active in this
1825                  * version.
1826                  *
1827                  * If this is a "wild" node and the caller hasn't disabled
1828                  * wildcard matching, remember that we've seen a wild node
1829                  * in case we need to go searching for wildcard matches
1830                  * later on.
1831                  */
1832                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
1833                         search->wild = ISC_TRUE;
1834         }
1835
1836         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
1837                     isc_rwlocktype_read);
1838
1839         return (result);
1840 }
1841
1842 static inline void
1843 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1844               rdatasetheader_t *header, isc_stdtime_t now,
1845               dns_rdataset_t *rdataset)
1846 {
1847         unsigned char *raw;     /* RDATASLAB */
1848
1849         /*
1850          * Caller must be holding the node reader lock.
1851          * XXXJT: technically, we need a writer lock, since we'll increment
1852          * the header count below.  However, since the actual counter value
1853          * doesn't matter, we prioritize performance here.  (We may want to
1854          * use atomic increment when available).
1855          */
1856
1857         if (rdataset == NULL)
1858                 return;
1859
1860         new_reference(rbtdb, node);
1861
1862         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
1863
1864         rdataset->methods = &rdataset_methods;
1865         rdataset->rdclass = rbtdb->common.rdclass;
1866         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
1867         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
1868         rdataset->ttl = header->ttl - now;
1869         rdataset->trust = header->trust;
1870         if (NXDOMAIN(header))
1871                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
1872         rdataset->private1 = rbtdb;
1873         rdataset->private2 = node;
1874         raw = (unsigned char *)header + sizeof(*header);
1875         rdataset->private3 = raw;
1876         rdataset->count = header->count++;
1877         if (rdataset->count == ISC_UINT32_MAX)
1878                 rdataset->count = 0;
1879
1880         /*
1881          * Reset iterator state.
1882          */
1883         rdataset->privateuint4 = 0;
1884         rdataset->private5 = NULL;
1885
1886         /*
1887          * Add noqname proof.
1888          */
1889         rdataset->private6 = header->noqname;
1890         if (rdataset->private6 != NULL)
1891                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
1892 }
1893
1894 static inline isc_result_t
1895 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
1896                  dns_name_t *foundname, dns_rdataset_t *rdataset,
1897                  dns_rdataset_t *sigrdataset)
1898 {
1899         isc_result_t result;
1900         dns_name_t *zcname;
1901         rbtdb_rdatatype_t type;
1902         dns_rbtnode_t *node;
1903
1904         /*
1905          * The caller MUST NOT be holding any node locks.
1906          */
1907
1908         node = search->zonecut;
1909         type = search->zonecut_rdataset->type;
1910
1911         /*
1912          * If we have to set foundname, we do it before anything else.
1913          * If we were to set foundname after we had set nodep or bound the
1914          * rdataset, then we'd have to undo that work if dns_name_copy()
1915          * failed.  By setting foundname first, there's nothing to undo if
1916          * we have trouble.
1917          */
1918         if (foundname != NULL && search->copy_name) {
1919                 zcname = dns_fixedname_name(&search->zonecut_name);
1920                 result = dns_name_copy(zcname, foundname, NULL);
1921                 if (result != ISC_R_SUCCESS)
1922                         return (result);
1923         }
1924         if (nodep != NULL) {
1925                 /*
1926                  * Note that we don't have to increment the node's reference
1927                  * count here because we're going to use the reference we
1928                  * already have in the search block.
1929                  */
1930                 *nodep = node;
1931                 search->need_cleanup = ISC_FALSE;
1932         }
1933         if (rdataset != NULL) {
1934                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
1935                           isc_rwlocktype_read);
1936                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
1937                               search->now, rdataset);
1938                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
1939                         bind_rdataset(search->rbtdb, node,
1940                                       search->zonecut_sigrdataset,
1941                                       search->now, sigrdataset);
1942                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
1943                             isc_rwlocktype_read);
1944         }
1945
1946         if (type == dns_rdatatype_dname)
1947                 return (DNS_R_DNAME);
1948         return (DNS_R_DELEGATION);
1949 }
1950
1951 static inline isc_boolean_t
1952 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
1953            dns_rbtnode_t *node)
1954 {
1955         unsigned char *raw;     /* RDATASLAB */
1956         unsigned int count, size;
1957         dns_name_t ns_name;
1958         isc_boolean_t valid = ISC_FALSE;
1959         dns_offsets_t offsets;
1960         isc_region_t region;
1961         rdatasetheader_t *header;
1962
1963         /*
1964          * No additional locking is required.
1965          */
1966
1967         /*
1968          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
1969          * if it occurs at a zone cut, but is not valid below it.
1970          */
1971         if (type == dns_rdatatype_ns) {
1972                 if (node != search->zonecut) {
1973                         return (ISC_FALSE);
1974                 }
1975         } else if (type != dns_rdatatype_a &&
1976                    type != dns_rdatatype_aaaa &&
1977                    type != dns_rdatatype_a6) {
1978                 return (ISC_FALSE);
1979         }
1980
1981         header = search->zonecut_rdataset;
1982         raw = (unsigned char *)header + sizeof(*header);
1983         count = raw[0] * 256 + raw[1];
1984 #if DNS_RDATASET_FIXED
1985         raw += 2 + (4 * count);
1986 #else 
1987         raw += 2;
1988 #endif
1989
1990         while (count > 0) {
1991                 count--;
1992                 size = raw[0] * 256 + raw[1];
1993 #if DNS_RDATASET_FIXED
1994                 raw += 4;
1995 #else
1996                 raw += 2;
1997 #endif
1998                 region.base = raw;
1999                 region.length = size;
2000                 raw += size;
2001                 /*
2002                  * XXX Until we have rdata structures, we have no choice but
2003                  * to directly access the rdata format.
2004                  */
2005                 dns_name_init(&ns_name, offsets);
2006                 dns_name_fromregion(&ns_name, &region);
2007                 if (dns_name_compare(&ns_name, name) == 0) {
2008                         valid = ISC_TRUE;
2009                         break;
2010                 }
2011         }
2012
2013         return (valid);
2014 }
2015
2016 static inline isc_boolean_t
2017 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2018             dns_name_t *name)
2019 {
2020         dns_fixedname_t fnext;
2021         dns_fixedname_t forigin;
2022         dns_name_t *next;
2023         dns_name_t *origin;
2024         dns_name_t prefix;
2025         dns_rbtdb_t *rbtdb;
2026         dns_rbtnode_t *node;
2027         isc_result_t result;
2028         isc_boolean_t answer = ISC_FALSE;
2029         rdatasetheader_t *header;
2030
2031         rbtdb = search->rbtdb;
2032
2033         dns_name_init(&prefix, NULL);
2034         dns_fixedname_init(&fnext);
2035         next = dns_fixedname_name(&fnext);
2036         dns_fixedname_init(&forigin);
2037         origin = dns_fixedname_name(&forigin);
2038
2039         result = dns_rbtnodechain_next(chain, NULL, NULL);
2040         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2041                 node = NULL;
2042                 result = dns_rbtnodechain_current(chain, &prefix,
2043                                                   origin, &node);
2044                 if (result != ISC_R_SUCCESS)
2045                         break;
2046                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2047                           isc_rwlocktype_read);
2048                 for (header = node->data;
2049                      header != NULL;
2050                      header = header->next) {
2051                         if (header->serial <= search->serial &&
2052                             !IGNORE(header) && EXISTS(header))
2053                                 break;
2054                 }
2055                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2056                             isc_rwlocktype_read);
2057                 if (header != NULL)
2058                         break;
2059                 result = dns_rbtnodechain_next(chain, NULL, NULL);
2060         }
2061         if (result == ISC_R_SUCCESS)
2062                 result = dns_name_concatenate(&prefix, origin, next, NULL);
2063         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2064                 answer = ISC_TRUE;
2065         return (answer);
2066 }
2067
2068 static inline isc_boolean_t
2069 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2070         dns_fixedname_t fnext;
2071         dns_fixedname_t forigin;
2072         dns_fixedname_t fprev;
2073         dns_name_t *next;
2074         dns_name_t *origin;
2075         dns_name_t *prev;
2076         dns_name_t name;
2077         dns_name_t rname;
2078         dns_name_t tname;
2079         dns_rbtdb_t *rbtdb;
2080         dns_rbtnode_t *node;
2081         dns_rbtnodechain_t chain;
2082         isc_boolean_t check_next = ISC_TRUE;
2083         isc_boolean_t check_prev = ISC_TRUE;
2084         isc_boolean_t answer = ISC_FALSE;
2085         isc_result_t result;
2086         rdatasetheader_t *header;
2087         unsigned int n;
2088
2089         rbtdb = search->rbtdb;
2090
2091         dns_name_init(&name, NULL);
2092         dns_name_init(&tname, NULL);
2093         dns_name_init(&rname, NULL);
2094         dns_fixedname_init(&fnext);
2095         next = dns_fixedname_name(&fnext);
2096         dns_fixedname_init(&fprev);
2097         prev = dns_fixedname_name(&fprev);
2098         dns_fixedname_init(&forigin);
2099         origin = dns_fixedname_name(&forigin);
2100
2101         /*
2102          * Find if qname is at or below a empty node.
2103          * Use our own copy of the chain.
2104          */
2105
2106         chain = search->chain;
2107         do {
2108                 node = NULL;
2109                 result = dns_rbtnodechain_current(&chain, &name,
2110                                                   origin, &node);
2111                 if (result != ISC_R_SUCCESS)
2112                         break;
2113                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2114                           isc_rwlocktype_read);
2115                 for (header = node->data;
2116                      header != NULL;
2117                      header = header->next) {
2118                         if (header->serial <= search->serial &&
2119                             !IGNORE(header) && EXISTS(header))
2120                                 break;
2121                 }
2122                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2123                             isc_rwlocktype_read);
2124                 if (header != NULL)
2125                         break;
2126                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
2127         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
2128         if (result == ISC_R_SUCCESS)
2129                 result = dns_name_concatenate(&name, origin, prev, NULL);
2130         if (result != ISC_R_SUCCESS)
2131                 check_prev = ISC_FALSE;
2132
2133         result = dns_rbtnodechain_next(&chain, NULL, NULL);
2134         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2135                 node = NULL;
2136                 result = dns_rbtnodechain_current(&chain, &name,
2137                                                   origin, &node);
2138                 if (result != ISC_R_SUCCESS)
2139                         break;
2140                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2141                           isc_rwlocktype_read);
2142                 for (header = node->data;
2143                      header != NULL;
2144                      header = header->next) {
2145                         if (header->serial <= search->serial &&
2146                             !IGNORE(header) && EXISTS(header))
2147                                 break;
2148                 }
2149                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2150                             isc_rwlocktype_read);
2151                 if (header != NULL)
2152                         break;
2153                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2154         }
2155         if (result == ISC_R_SUCCESS)
2156                 result = dns_name_concatenate(&name, origin, next, NULL);
2157         if (result != ISC_R_SUCCESS)
2158                 check_next = ISC_FALSE;
2159
2160         dns_name_clone(qname, &rname);
2161
2162         /*
2163          * Remove the wildcard label to find the terminal name.
2164          */
2165         n = dns_name_countlabels(wname);
2166         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
2167
2168         do {
2169                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
2170                     (check_next && dns_name_issubdomain(next, &rname))) {
2171                         answer = ISC_TRUE;
2172                         break;
2173                 }
2174                 /*
2175                  * Remove the left hand label.
2176                  */
2177                 n = dns_name_countlabels(&rname);
2178                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
2179         } while (!dns_name_equal(&rname, &tname));
2180         return (answer);
2181 }
2182
2183 static inline isc_result_t
2184 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
2185               dns_name_t *qname)
2186 {
2187         unsigned int i, j;
2188         dns_rbtnode_t *node, *level_node, *wnode;
2189         rdatasetheader_t *header;
2190         isc_result_t result = ISC_R_NOTFOUND;
2191         dns_name_t name;
2192         dns_name_t *wname;
2193         dns_fixedname_t fwname;
2194         dns_rbtdb_t *rbtdb;
2195         isc_boolean_t done, wild, active;
2196         dns_rbtnodechain_t wchain;
2197
2198         /*
2199          * Caller must be holding the tree lock and MUST NOT be holding
2200          * any node locks.
2201          */
2202
2203         /*
2204          * Examine each ancestor level.  If the level's wild bit
2205          * is set, then construct the corresponding wildcard name and
2206          * search for it.  If the wildcard node exists, and is active in
2207          * this version, we're done.  If not, then we next check to see
2208          * if the ancestor is active in this version.  If so, then there
2209          * can be no possible wildcard match and again we're done.  If not,
2210          * continue the search.
2211          */
2212
2213         rbtdb = search->rbtdb;
2214         i = search->chain.level_matches;
2215         done = ISC_FALSE;
2216         node = *nodep;
2217         do {
2218                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2219                           isc_rwlocktype_read);
2220
2221                 /*
2222                  * First we try to figure out if this node is active in
2223                  * the search's version.  We do this now, even though we
2224                  * may not need the information, because it simplifies the
2225                  * locking and code flow.
2226                  */
2227                 for (header = node->data;
2228                      header != NULL;
2229                      header = header->next) {
2230                         if (header->serial <= search->serial &&
2231                             !IGNORE(header) && EXISTS(header))
2232                                 break;
2233                 }
2234                 if (header != NULL)
2235                         active = ISC_TRUE;
2236                 else
2237                         active = ISC_FALSE;
2238
2239                 if (node->wild)
2240                         wild = ISC_TRUE;
2241                 else
2242                         wild = ISC_FALSE;
2243
2244                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2245                             isc_rwlocktype_read);
2246
2247                 if (wild) {
2248                         /*
2249                          * Construct the wildcard name for this level.
2250                          */
2251                         dns_name_init(&name, NULL);
2252                         dns_rbt_namefromnode(node, &name);
2253                         dns_fixedname_init(&fwname);
2254                         wname = dns_fixedname_name(&fwname);
2255                         result = dns_name_concatenate(dns_wildcardname, &name,
2256                                                       wname, NULL);
2257                         j = i;
2258                         while (result == ISC_R_SUCCESS && j != 0) {
2259                                 j--;
2260                                 level_node = search->chain.levels[j];
2261                                 dns_name_init(&name, NULL);
2262                                 dns_rbt_namefromnode(level_node, &name);
2263                                 result = dns_name_concatenate(wname,
2264                                                               &name,
2265                                                               wname,
2266                                                               NULL);
2267                         }
2268                         if (result != ISC_R_SUCCESS)
2269                                 break;
2270
2271                         wnode = NULL;
2272                         dns_rbtnodechain_init(&wchain, NULL);
2273                         result = dns_rbt_findnode(rbtdb->tree, wname,
2274                                                   NULL, &wnode, &wchain,
2275                                                   DNS_RBTFIND_EMPTYDATA,
2276                                                   NULL, NULL);
2277                         if (result == ISC_R_SUCCESS) {
2278                                 nodelock_t *lock;
2279
2280                                 /*
2281                                  * We have found the wildcard node.  If it
2282                                  * is active in the search's version, we're
2283                                  * done.
2284                                  */
2285                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
2286                                 NODE_LOCK(lock, isc_rwlocktype_read);
2287                                 for (header = wnode->data;
2288                                      header != NULL;
2289                                      header = header->next) {
2290                                         if (header->serial <= search->serial &&
2291                                             !IGNORE(header) && EXISTS(header))
2292                                                 break;
2293                                 }
2294                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
2295                                 if (header != NULL ||
2296                                     activeempty(search, &wchain, wname)) {
2297                                         if (activeemtpynode(search, qname,
2298                                                             wname)) {
2299                                                 return (ISC_R_NOTFOUND);
2300                                         }
2301                                         /*
2302                                          * The wildcard node is active!
2303                                          *
2304                                          * Note: result is still ISC_R_SUCCESS
2305                                          * so we don't have to set it.
2306                                          */
2307                                         *nodep = wnode;
2308                                         break;
2309                                 }
2310                         } else if (result != ISC_R_NOTFOUND &&
2311                                    result != DNS_R_PARTIALMATCH) {
2312                                 /*
2313                                  * An error has occurred.  Bail out.
2314                                  */
2315                                 break;
2316                         }
2317                 }
2318
2319                 if (active) {
2320                         /*
2321                          * The level node is active.  Any wildcarding
2322                          * present at higher levels has no
2323                          * effect and we're done.
2324                          */
2325                         result = ISC_R_NOTFOUND;
2326                         break;
2327                 }
2328
2329                 if (i > 0) {
2330                         i--;
2331                         node = search->chain.levels[i];
2332                 } else
2333                         done = ISC_TRUE;
2334         } while (!done);
2335
2336         return (result);
2337 }
2338
2339 static inline isc_result_t
2340 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
2341                   dns_name_t *foundname, dns_rdataset_t *rdataset,
2342                   dns_rdataset_t *sigrdataset, isc_boolean_t need_sig)
2343 {
2344         dns_rbtnode_t *node;
2345         rdatasetheader_t *header, *header_next, *found, *foundsig;
2346         isc_boolean_t empty_node;
2347         isc_result_t result;
2348         dns_fixedname_t fname, forigin;
2349         dns_name_t *name, *origin;
2350
2351         do {
2352                 node = NULL;
2353                 dns_fixedname_init(&fname);
2354                 name = dns_fixedname_name(&fname);
2355                 dns_fixedname_init(&forigin);
2356                 origin = dns_fixedname_name(&forigin);
2357                 result = dns_rbtnodechain_current(&search->chain, name,
2358                                                   origin, &node);
2359                 if (result != ISC_R_SUCCESS)
2360                         return (result);
2361                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2362                           isc_rwlocktype_read);
2363                 found = NULL;
2364                 foundsig = NULL;
2365                 empty_node = ISC_TRUE;
2366                 for (header = node->data;
2367                      header != NULL;
2368                      header = header_next) {
2369                         header_next = header->next;
2370                         /*
2371                          * Look for an active, extant NSEC or RRSIG NSEC.
2372                          */
2373                         do {
2374                                 if (header->serial <= search->serial &&
2375                                     !IGNORE(header)) {
2376                                         /*
2377                                          * Is this a "this rdataset doesn't
2378                                          * exist" record?
2379                                          */
2380                                         if (NONEXISTENT(header))
2381                                                 header = NULL;
2382                                         break;
2383                                 } else
2384                                         header = header->down;
2385                         } while (header != NULL);
2386                         if (header != NULL) {
2387                                 /*
2388                                  * We now know that there is at least one
2389                                  * active rdataset at this node.
2390                                  */
2391                                 empty_node = ISC_FALSE;
2392                                 if (header->type == dns_rdatatype_nsec) {
2393                                         found = header;
2394                                         if (foundsig != NULL)
2395                                                 break;
2396                                 } else if (header->type ==
2397                                            RBTDB_RDATATYPE_SIGNSEC) {
2398                                         foundsig = header;
2399                                         if (found != NULL)
2400                                                 break;
2401                                 }
2402                         }
2403                 }
2404                 if (!empty_node) {
2405                         if (found != NULL &&
2406                             (foundsig != NULL || !need_sig))
2407                         {
2408                                 /*
2409                                  * We've found the right NSEC record.
2410                                  *
2411                                  * Note: for this to really be the right
2412                                  * NSEC record, it's essential that the NSEC
2413                                  * records of any nodes obscured by a zone
2414                                  * cut have been removed; we assume this is
2415                                  * the case.
2416                                  */
2417                                 result = dns_name_concatenate(name, origin,
2418                                                               foundname, NULL);
2419                                 if (result == ISC_R_SUCCESS) {
2420                                         if (nodep != NULL) {
2421                                                 new_reference(search->rbtdb,
2422                                                               node);
2423                                                 *nodep = node;
2424                                         }
2425                                         bind_rdataset(search->rbtdb, node,
2426                                                       found, search->now,
2427                                                       rdataset);
2428                                         if (foundsig != NULL)
2429                                                 bind_rdataset(search->rbtdb,
2430                                                               node,
2431                                                               foundsig,
2432                                                               search->now,
2433                                                               sigrdataset);
2434                                 }
2435                         } else if (found == NULL && foundsig == NULL) {
2436                                 /*
2437                                  * This node is active, but has no NSEC or
2438                                  * RRSIG NSEC.  That means it's glue or
2439                                  * other obscured zone data that isn't
2440                                  * relevant for our search.  Treat the
2441                                  * node as if it were empty and keep looking.
2442                                  */
2443                                 empty_node = ISC_TRUE;
2444                                 result = dns_rbtnodechain_prev(&search->chain,
2445                                                                NULL, NULL);
2446                         } else {
2447                                 /*
2448                                  * We found an active node, but either the
2449                                  * NSEC or the RRSIG NSEC is missing.  This
2450                                  * shouldn't happen.
2451                                  */
2452                                 result = DNS_R_BADDB;
2453                         }
2454                 } else {
2455                         /*
2456                          * This node isn't active.  We've got to keep
2457                          * looking.
2458                          */
2459                         result = dns_rbtnodechain_prev(&search->chain, NULL,
2460                                                        NULL);
2461                 }
2462                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2463                             isc_rwlocktype_read);
2464         } while (empty_node && result == ISC_R_SUCCESS);
2465
2466         /*
2467          * If the result is ISC_R_NOMORE, then we got to the beginning of
2468          * the database and didn't find a NSEC record.  This shouldn't
2469          * happen.
2470          */
2471         if (result == ISC_R_NOMORE)
2472                 result = DNS_R_BADDB;
2473
2474         return (result);
2475 }
2476
2477 static isc_result_t
2478 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
2479           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
2480           dns_dbnode_t **nodep, dns_name_t *foundname,
2481           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
2482 {
2483         dns_rbtnode_t *node = NULL;
2484         isc_result_t result;
2485         rbtdb_search_t search;
2486         isc_boolean_t cname_ok = ISC_TRUE;
2487         isc_boolean_t close_version = ISC_FALSE;
2488         isc_boolean_t maybe_zonecut = ISC_FALSE;
2489         isc_boolean_t at_zonecut = ISC_FALSE;
2490         isc_boolean_t wild;
2491         isc_boolean_t empty_node;
2492         rdatasetheader_t *header, *header_next, *found, *nsecheader;
2493         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
2494         rbtdb_rdatatype_t sigtype;
2495         isc_boolean_t active;
2496         dns_rbtnodechain_t chain;
2497         nodelock_t *lock;
2498
2499
2500         search.rbtdb = (dns_rbtdb_t *)db;
2501
2502         REQUIRE(VALID_RBTDB(search.rbtdb));
2503
2504         /*
2505          * We don't care about 'now'.
2506          */
2507         UNUSED(now);
2508
2509         /*
2510          * If the caller didn't supply a version, attach to the current
2511          * version.
2512          */
2513         if (version == NULL) {
2514                 currentversion(db, &version);
2515                 close_version = ISC_TRUE;
2516         }
2517
2518         search.rbtversion = version;
2519         search.serial = search.rbtversion->serial;
2520         search.options = options;
2521         search.copy_name = ISC_FALSE;
2522         search.need_cleanup = ISC_FALSE;
2523         search.wild = ISC_FALSE;
2524         search.zonecut = NULL;
2525         dns_fixedname_init(&search.zonecut_name);
2526         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
2527         search.now = 0;
2528
2529         /*
2530          * 'wild' will be true iff. we've matched a wildcard.
2531          */
2532         wild = ISC_FALSE;
2533
2534         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
2535
2536         /*
2537          * Search down from the root of the tree.  If, while going down, we
2538          * encounter a callback node, zone_zonecut_callback() will search the
2539          * rdatasets at the zone cut for active DNAME or NS rdatasets.
2540          */
2541         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
2542                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
2543                                   zone_zonecut_callback, &search);
2544
2545         if (result == DNS_R_PARTIALMATCH) {
2546         partial_match:
2547                 if (search.zonecut != NULL) {
2548                     result = setup_delegation(&search, nodep, foundname,
2549                                               rdataset, sigrdataset);
2550                     goto tree_exit;
2551                 }
2552
2553                 if (search.wild) {
2554                         /*
2555                          * At least one of the levels in the search chain
2556                          * potentially has a wildcard.  For each such level,
2557                          * we must see if there's a matching wildcard active
2558                          * in the current version.
2559                          */
2560                         result = find_wildcard(&search, &node, name);
2561                         if (result == ISC_R_SUCCESS) {
2562                                 result = dns_name_copy(name, foundname, NULL);
2563                                 if (result != ISC_R_SUCCESS)
2564                                         goto tree_exit;
2565                                 wild = ISC_TRUE;
2566                                 goto found;
2567                         }
2568                         else if (result != ISC_R_NOTFOUND)
2569                                 goto tree_exit;
2570                 }
2571
2572                 chain = search.chain;
2573                 active = activeempty(&search, &chain, name);
2574
2575                 /*
2576                  * If we're here, then the name does not exist, is not
2577                  * beneath a zonecut, and there's no matching wildcard.
2578                  */
2579                 if (search.rbtdb->secure ||
2580                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
2581                 {
2582                         result = find_closest_nsec(&search, nodep, foundname,
2583                                                   rdataset, sigrdataset,
2584                                                   search.rbtdb->secure);
2585                         if (result == ISC_R_SUCCESS)
2586                                 result = active ? DNS_R_EMPTYNAME :
2587                                                   DNS_R_NXDOMAIN;
2588                 } else
2589                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
2590                 goto tree_exit;
2591         } else if (result != ISC_R_SUCCESS)
2592                 goto tree_exit;
2593
2594  found:
2595         /*
2596          * We have found a node whose name is the desired name, or we
2597          * have matched a wildcard.
2598          */
2599
2600         if (search.zonecut != NULL) {
2601                 /*
2602                  * If we're beneath a zone cut, we don't want to look for
2603                  * CNAMEs because they're not legitimate zone glue.
2604                  */
2605                 cname_ok = ISC_FALSE;
2606         } else {
2607                 /*
2608                  * The node may be a zone cut itself.  If it might be one,
2609                  * make sure we check for it later.
2610                  */
2611                 if (node->find_callback &&
2612                     (node != search.rbtdb->origin_node ||
2613                      IS_STUB(search.rbtdb)) &&
2614                     !dns_rdatatype_atparent(type))
2615                         maybe_zonecut = ISC_TRUE;
2616         }
2617
2618         /*
2619          * Certain DNSSEC types are not subject to CNAME matching
2620          * (RFC4035, section 2.5 and RFC3007).
2621          *
2622          * We don't check for RRSIG, because we don't store RRSIG records
2623          * directly.
2624          */
2625         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
2626                 cname_ok = ISC_FALSE;
2627
2628         /*
2629          * We now go looking for rdata...
2630          */
2631
2632         NODE_LOCK(&(search.rbtdb->node_locks[node->locknum].lock),
2633                   isc_rwlocktype_read);
2634
2635         found = NULL;
2636         foundsig = NULL;
2637         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
2638         nsecheader = NULL;
2639         nsecsig = NULL;
2640         cnamesig = NULL;
2641         empty_node = ISC_TRUE;
2642         for (header = node->data; header != NULL; header = header_next) {
2643                 header_next = header->next;
2644                 /*
2645                  * Look for an active, extant rdataset.
2646                  */
2647                 do {
2648                         if (header->serial <= search.serial &&
2649                             !IGNORE(header)) {
2650                                 /*
2651                                  * Is this a "this rdataset doesn't
2652                                  * exist" record?
2653                                  */
2654                                 if (NONEXISTENT(header))
2655                                         header = NULL;
2656                                 break;
2657                         } else
2658                                 header = header->down;
2659                 } while (header != NULL);
2660                 if (header != NULL) {
2661                         /*
2662                          * We now know that there is at least one active
2663                          * rdataset at this node.
2664                          */
2665                         empty_node = ISC_FALSE;
2666
2667                         /*
2668                          * Do special zone cut handling, if requested.
2669                          */
2670                         if (maybe_zonecut &&
2671                             header->type == dns_rdatatype_ns) {
2672                                 /*
2673                                  * We increment the reference count on node to
2674                                  * ensure that search->zonecut_rdataset will
2675                                  * still be valid later.
2676                                  */
2677                                 new_reference(search.rbtdb, node);
2678                                 search.zonecut = node;
2679                                 search.zonecut_rdataset = header;
2680                                 search.zonecut_sigrdataset = NULL;
2681                                 search.need_cleanup = ISC_TRUE;
2682                                 maybe_zonecut = ISC_FALSE;
2683                                 at_zonecut = ISC_TRUE;
2684                                 /*
2685                                  * It is not clear if KEY should still be
2686                                  * allowed at the parent side of the zone
2687                                  * cut or not.  It is needed for RFC3007
2688                                  * validated updates.
2689                                  */
2690                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
2691                                     && type != dns_rdatatype_nsec
2692                                     && type != dns_rdatatype_key) {
2693                                         /*
2694                                          * Glue is not OK, but any answer we
2695                                          * could return would be glue.  Return
2696                                          * the delegation.
2697                                          */
2698                                         found = NULL;
2699                                         break;
2700                                 }
2701                                 if (found != NULL && foundsig != NULL)
2702                                         break;
2703                         }
2704
2705                         /*
2706                          * If we found a type we were looking for,
2707                          * remember it.
2708                          */
2709                         if (header->type == type ||
2710                             type == dns_rdatatype_any ||
2711                             (header->type == dns_rdatatype_cname &&
2712                              cname_ok)) {
2713                                 /*
2714                                  * We've found the answer!
2715                                  */
2716                                 found = header;
2717                                 if (header->type == dns_rdatatype_cname &&
2718                                     cname_ok) {
2719                                         /*
2720                                          * We may be finding a CNAME instead
2721                                          * of the desired type.
2722                                          *
2723                                          * If we've already got the CNAME RRSIG,
2724                                          * use it, otherwise change sigtype
2725                                          * so that we find it.
2726                                          */
2727                                         if (cnamesig != NULL)
2728                                                 foundsig = cnamesig;
2729                                         else
2730                                                 sigtype =
2731                                                     RBTDB_RDATATYPE_SIGCNAME;
2732                                 }
2733                                 /*
2734                                  * If we've got all we need, end the search.
2735                                  */
2736                                 if (!maybe_zonecut && foundsig != NULL)
2737                                         break;
2738                         } else if (header->type == sigtype) {
2739                                 /*
2740                                  * We've found the RRSIG rdataset for our
2741                                  * target type.  Remember it.
2742                                  */
2743                                 foundsig = header;
2744                                 /*
2745                                  * If we've got all we need, end the search.
2746                                  */
2747                                 if (!maybe_zonecut && found != NULL)
2748                                         break;
2749                         } else if (header->type == dns_rdatatype_nsec) {
2750                                 /*
2751                                  * Remember a NSEC rdataset even if we're
2752                                  * not specifically looking for it, because
2753                                  * we might need it later.
2754                                  */
2755                                 nsecheader = header;
2756                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC) {
2757                                 /*
2758                                  * If we need the NSEC rdataset, we'll also
2759                                  * need its signature.
2760                                  */
2761                                 nsecsig = header;
2762                         } else if (cname_ok &&
2763                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
2764                                 /*
2765                                  * If we get a CNAME match, we'll also need
2766                                  * its signature.
2767                                  */
2768                                 cnamesig = header;
2769                         }
2770                 }
2771         }
2772
2773         if (empty_node) {
2774                 /*
2775                  * We have an exact match for the name, but there are no
2776                  * active rdatasets in the desired version.  That means that
2777                  * this node doesn't exist in the desired version, and that
2778                  * we really have a partial match.
2779                  */
2780                 if (!wild) {
2781                         lock = &search.rbtdb->node_locks[node->locknum].lock;
2782                         NODE_UNLOCK(lock, isc_rwlocktype_read);
2783                         goto partial_match;
2784                 }
2785         }
2786
2787         /*
2788          * If we didn't find what we were looking for...
2789          */
2790         if (found == NULL) {
2791                 if (search.zonecut != NULL) {
2792                         /*
2793                          * We were trying to find glue at a node beneath a
2794                          * zone cut, but didn't.
2795                          *
2796                          * Return the delegation.
2797                          */
2798                         lock = &search.rbtdb->node_locks[node->locknum].lock;
2799                         NODE_UNLOCK(lock, isc_rwlocktype_read);
2800                         result = setup_delegation(&search, nodep, foundname,
2801                                                   rdataset, sigrdataset);
2802                         goto tree_exit;
2803                 }
2804                 /*
2805                  * The desired type doesn't exist.
2806                  */
2807                 result = DNS_R_NXRRSET;
2808                 if (search.rbtdb->secure &&
2809                     (nsecheader == NULL || nsecsig == NULL)) {
2810                         /*
2811                          * The zone is secure but there's no NSEC,
2812                          * or the NSEC has no signature!
2813                          */
2814                         if (!wild) {
2815                                 result = DNS_R_BADDB;
2816                                 goto node_exit;
2817                         }
2818
2819                         lock = &search.rbtdb->node_locks[node->locknum].lock;
2820                         NODE_UNLOCK(lock, isc_rwlocktype_read);
2821                         result = find_closest_nsec(&search, nodep, foundname,
2822                                                    rdataset, sigrdataset,
2823                                                    search.rbtdb->secure);
2824                         if (result == ISC_R_SUCCESS)
2825                                 result = DNS_R_EMPTYWILD;
2826                         goto tree_exit;
2827                 }
2828                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
2829                     nsecheader == NULL)
2830                 {
2831                         /*
2832                          * There's no NSEC record, and we were told
2833                          * to find one.
2834                          */
2835                         result = DNS_R_BADDB;
2836                         goto node_exit;
2837                 }
2838                 if (nodep != NULL) {
2839                         new_reference(search.rbtdb, node);
2840                         *nodep = node;
2841                 }
2842                 if (search.rbtdb->secure ||
2843                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
2844                 {
2845                         bind_rdataset(search.rbtdb, node, nsecheader,
2846                                       0, rdataset);
2847                         if (nsecsig != NULL)
2848                                 bind_rdataset(search.rbtdb, node,
2849                                               nsecsig, 0, sigrdataset);
2850                 }
2851                 if (wild)
2852                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
2853                 goto node_exit;
2854         }
2855
2856         /*
2857          * We found what we were looking for, or we found a CNAME.
2858          */
2859
2860         if (type != found->type &&
2861             type != dns_rdatatype_any &&
2862             found->type == dns_rdatatype_cname) {
2863                 /*
2864                  * We weren't doing an ANY query and we found a CNAME instead
2865                  * of the type we were looking for, so we need to indicate
2866                  * that result to the caller.
2867                  */
2868                 result = DNS_R_CNAME;
2869         } else if (search.zonecut != NULL) {
2870                 /*
2871                  * If we're beneath a zone cut, we must indicate that the
2872                  * result is glue, unless we're actually at the zone cut
2873                  * and the type is NSEC or KEY.
2874                  */
2875                 if (search.zonecut == node) {
2876                         /*
2877                          * It is not clear if KEY should still be
2878                          * allowed at the parent side of the zone
2879                          * cut or not.  It is needed for RFC3007
2880                          * validated updates.
2881                          */
2882                         if (type == dns_rdatatype_nsec ||
2883                             type == dns_rdatatype_key)
2884                                 result = ISC_R_SUCCESS;
2885                         else if (type == dns_rdatatype_any)
2886                                 result = DNS_R_ZONECUT;
2887                         else
2888                                 result = DNS_R_GLUE;
2889                 } else
2890                         result = DNS_R_GLUE;
2891                 /*
2892                  * We might have found data that isn't glue, but was occluded
2893                  * by a dynamic update.  If the caller cares about this, they
2894                  * will have told us to validate glue.
2895                  *
2896                  * XXX We should cache the glue validity state!
2897                  */
2898                 if (result == DNS_R_GLUE &&
2899                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
2900                     !valid_glue(&search, foundname, type, node)) {
2901                         lock = &search.rbtdb->node_locks[node->locknum].lock;
2902                         NODE_UNLOCK(lock, isc_rwlocktype_read);
2903                         result = setup_delegation(&search, nodep, foundname,
2904                                                   rdataset, sigrdataset);
2905                     goto tree_exit;
2906                 }
2907         } else {
2908                 /*
2909                  * An ordinary successful query!
2910                  */
2911                 result = ISC_R_SUCCESS;
2912         }
2913
2914         if (nodep != NULL) {
2915                 if (!at_zonecut)
2916                         new_reference(search.rbtdb, node);
2917                 else
2918                         search.need_cleanup = ISC_FALSE;
2919                 *nodep = node;
2920         }
2921
2922         if (type != dns_rdatatype_any) {
2923                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
2924                 if (foundsig != NULL)
2925                         bind_rdataset(search.rbtdb, node, foundsig, 0,
2926                                       sigrdataset);
2927         }
2928
2929         if (wild)
2930                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
2931
2932  node_exit:
2933         NODE_UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock),
2934                     isc_rwlocktype_read);
2935
2936  tree_exit:
2937         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
2938
2939         /*
2940          * If we found a zonecut but aren't going to use it, we have to
2941          * let go of it.
2942          */
2943         if (search.need_cleanup) {
2944                 node = search.zonecut;
2945                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
2946
2947                 NODE_LOCK(lock, isc_rwlocktype_read);
2948                 decrement_reference(search.rbtdb, node, 0,
2949                                     isc_rwlocktype_read, isc_rwlocktype_none);
2950                 NODE_UNLOCK(lock, isc_rwlocktype_read);
2951         }
2952
2953         if (close_version)
2954                 closeversion(db, &version, ISC_FALSE);
2955
2956         dns_rbtnodechain_reset(&search.chain);
2957
2958         return (result);
2959 }
2960
2961 static isc_result_t
2962 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
2963                  isc_stdtime_t now, dns_dbnode_t **nodep,
2964                  dns_name_t *foundname,
2965                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
2966 {
2967         UNUSED(db);
2968         UNUSED(name);
2969         UNUSED(options);
2970         UNUSED(now);
2971         UNUSED(nodep);
2972         UNUSED(foundname);
2973         UNUSED(rdataset);
2974         UNUSED(sigrdataset);
2975
2976         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
2977
2978         return (ISC_R_NOTIMPLEMENTED);
2979 }
2980
2981 static isc_result_t
2982 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2983         rbtdb_search_t *search = arg;
2984         rdatasetheader_t *header, *header_prev, *header_next;
2985         rdatasetheader_t *dname_header, *sigdname_header;
2986         isc_result_t result;
2987         nodelock_t *lock;
2988         isc_rwlocktype_t locktype;
2989
2990         /* XXX comment */
2991
2992         REQUIRE(search->zonecut == NULL);
2993
2994         /*
2995          * Keep compiler silent.
2996          */
2997         UNUSED(name);
2998
2999         lock = &(search->rbtdb->node_locks[node->locknum].lock);
3000         locktype = isc_rwlocktype_read; 
3001         NODE_LOCK(lock, locktype);
3002
3003         /*
3004          * Look for a DNAME or RRSIG DNAME rdataset.
3005          */
3006         dname_header = NULL;
3007         sigdname_header = NULL;
3008         header_prev = NULL;
3009         for (header = node->data; header != NULL; header = header_next) {
3010                 header_next = header->next;
3011                 if (header->ttl <= search->now) {
3012                         /*
3013                          * This rdataset is stale.  If no one else is
3014                          * using the node, we can clean it up right
3015                          * now, otherwise we mark it as stale, and
3016                          * the node as dirty, so it will get cleaned
3017                          * up later.
3018                          */
3019                         if ((header->ttl <= search->now - RBTDB_VIRTUAL) &&
3020                             (locktype == isc_rwlocktype_write ||
3021                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3022                                 /*
3023                                  * We update the node's status only when we
3024                                  * can get write access; otherwise, we leave
3025                                  * others to this work.  Periodical cleaning
3026                                  * will eventually take the job as the last
3027                                  * resort.
3028                                  * We won't downgrade the lock, since other
3029                                  * rdatasets are probably stale, too. 
3030                                  */
3031                                 locktype = isc_rwlocktype_write;
3032
3033                                 if (dns_rbtnode_refcurrent(node) == 0) {
3034                                         isc_mem_t *mctx;
3035
3036                                         /*
3037                                          * header->down can be non-NULL if the
3038                                          * refcount has just decremented to 0
3039                                          * but decrement_reference() has not
3040                                          * performed clean_cache_node(), in
3041                                          * which case we need to purge the
3042                                          * stale headers first.
3043                                          */
3044                                         mctx = search->rbtdb->common.mctx;
3045                                         clean_stale_headers(mctx, header);
3046                                         if (header_prev != NULL)
3047                                                 header_prev->next =
3048                                                         header->next;
3049                                         else
3050                                                 node->data = header->next;
3051                                         free_rdataset(mctx, header);
3052                                 } else {
3053                                         header->attributes |=
3054                                                 RDATASET_ATTR_STALE;
3055                                         node->dirty = 1;
3056                                         header_prev = header;
3057                                 }
3058                         } else
3059                                 header_prev = header;
3060                 } else if (header->type == dns_rdatatype_dname &&
3061                            EXISTS(header)) {
3062                         dname_header = header;
3063                         header_prev = header;
3064                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
3065                          EXISTS(header)) {
3066                         sigdname_header = header;
3067                         header_prev = header;
3068                 } else
3069                         header_prev = header;
3070         }
3071
3072         if (dname_header != NULL &&
3073             (dname_header->trust != dns_trust_pending ||
3074              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
3075                 /*
3076                  * We increment the reference count on node to ensure that
3077                  * search->zonecut_rdataset will still be valid later.
3078                  */
3079                 new_reference(search->rbtdb, node);
3080                 search->zonecut = node;
3081                 search->zonecut_rdataset = dname_header;
3082                 search->zonecut_sigrdataset = sigdname_header;
3083                 search->need_cleanup = ISC_TRUE;
3084                 result = DNS_R_PARTIALMATCH;
3085         } else
3086                 result = DNS_R_CONTINUE;
3087
3088         NODE_UNLOCK(lock, locktype);
3089
3090         return (result);
3091 }
3092
3093 static inline isc_result_t
3094 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
3095                      dns_dbnode_t **nodep, dns_name_t *foundname,
3096                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3097 {
3098         unsigned int i;
3099         dns_rbtnode_t *level_node;
3100         rdatasetheader_t *header, *header_prev, *header_next;
3101         rdatasetheader_t *found, *foundsig;
3102         isc_result_t result = ISC_R_NOTFOUND;
3103         dns_name_t name;
3104         dns_rbtdb_t *rbtdb;
3105         isc_boolean_t done;
3106         nodelock_t *lock;
3107         isc_rwlocktype_t locktype;
3108
3109         /*
3110          * Caller must be holding the tree lock.
3111          */
3112
3113         rbtdb = search->rbtdb;
3114         i = search->chain.level_matches;
3115         done = ISC_FALSE;
3116         do {
3117                 locktype = isc_rwlocktype_read;
3118                 lock = &rbtdb->node_locks[node->locknum].lock;
3119                 NODE_LOCK(lock, locktype);
3120
3121                 /*
3122                  * Look for NS and RRSIG NS rdatasets.
3123                  */
3124                 found = NULL;
3125                 foundsig = NULL;
3126                 header_prev = NULL;
3127                 for (header = node->data;
3128                      header != NULL;
3129                      header = header_next) {
3130                         header_next = header->next;
3131                         if (header->ttl <= search->now) {
3132                                 /*
3133                                  * This rdataset is stale.  If no one else is
3134                                  * using the node, we can clean it up right
3135                                  * now, otherwise we mark it as stale, and
3136                                  * the node as dirty, so it will get cleaned
3137                                  * up later.
3138                                  */
3139                                 if ((header->ttl <= search->now -
3140                                                     RBTDB_VIRTUAL) &&
3141                                     (locktype == isc_rwlocktype_write ||
3142                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3143                                         /*
3144                                          * We update the node's status only
3145                                          * when we can get write access.
3146                                          */
3147                                         locktype = isc_rwlocktype_write;
3148
3149                                         if (dns_rbtnode_refcurrent(node)
3150                                             == 0) {
3151                                                 isc_mem_t *m;
3152
3153                                                 m = search->rbtdb->common.mctx;
3154                                                 clean_stale_headers(m, header);
3155                                                 if (header_prev != NULL)
3156                                                         header_prev->next =
3157                                                                 header->next;
3158                                                 else
3159                                                         node->data =
3160                                                                 header->next;
3161                                                 free_rdataset(m, header);
3162                                         } else {
3163                                                 header->attributes |=
3164                                                         RDATASET_ATTR_STALE;
3165                                                 node->dirty = 1;
3166                                                 header_prev = header;
3167                                         }
3168                                 } else
3169                                         header_prev = header;
3170                         } else if (EXISTS(header)) {
3171                                 /*
3172                                  * We've found an extant rdataset.  See if
3173                                  * we're interested in it.
3174                                  */
3175                                 if (header->type == dns_rdatatype_ns) {
3176                                         found = header;
3177                                         if (foundsig != NULL)
3178                                                 break;
3179                                 } else if (header->type ==
3180                                            RBTDB_RDATATYPE_SIGNS) {
3181                                         foundsig = header;
3182                                         if (found != NULL)
3183                                                 break;
3184                                 }
3185                                 header_prev = header;
3186                         } else
3187                                 header_prev = header;
3188                 }
3189
3190                 if (found != NULL) {
3191                         /*
3192                          * If we have to set foundname, we do it before
3193                          * anything else.  If we were to set foundname after
3194                          * we had set nodep or bound the rdataset, then we'd
3195                          * have to undo that work if dns_name_concatenate()
3196                          * failed.  By setting foundname first, there's
3197                          * nothing to undo if we have trouble.
3198                          */
3199                         if (foundname != NULL) {
3200                                 dns_name_init(&name, NULL);
3201                                 dns_rbt_namefromnode(node, &name);
3202                                 result = dns_name_copy(&name, foundname, NULL);
3203                                 while (result == ISC_R_SUCCESS && i > 0) {
3204                                         i--;
3205                                         level_node = search->chain.levels[i];
3206                                         dns_name_init(&name, NULL);
3207                                         dns_rbt_namefromnode(level_node,
3208                                                              &name);
3209                                         result =
3210                                                 dns_name_concatenate(foundname,
3211                                                                      &name,
3212                                                                      foundname,
3213                                                                      NULL);
3214                                 }
3215                                 if (result != ISC_R_SUCCESS) {
3216                                         *nodep = NULL;
3217                                         goto node_exit;
3218                                 }
3219                         }
3220                         result = DNS_R_DELEGATION;
3221                         if (nodep != NULL) {
3222                                 new_reference(search->rbtdb, node);
3223                                 *nodep = node;
3224                         }
3225                         bind_rdataset(search->rbtdb, node, found, search->now,
3226                                       rdataset);
3227                         if (foundsig != NULL)
3228                                 bind_rdataset(search->rbtdb, node, foundsig,
3229                                               search->now, sigrdataset);
3230                 }
3231
3232         node_exit:
3233                 NODE_UNLOCK(lock, locktype);
3234
3235                 if (found == NULL && i > 0) {
3236                         i--;
3237                         node = search->chain.levels[i];
3238                 } else
3239                         done = ISC_TRUE;
3240
3241         } while (!done);
3242
3243         return (result);
3244 }
3245
3246 static isc_result_t
3247 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3248                   isc_stdtime_t now, dns_name_t *foundname,
3249                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3250 {
3251         dns_rbtnode_t *node;
3252         rdatasetheader_t *header, *header_next, *header_prev;
3253         rdatasetheader_t *found, *foundsig;
3254         isc_boolean_t empty_node;
3255         isc_result_t result;
3256         dns_fixedname_t fname, forigin;
3257         dns_name_t *name, *origin;
3258         rbtdb_rdatatype_t matchtype, sigmatchtype;
3259         nodelock_t *lock;
3260         isc_rwlocktype_t locktype;
3261
3262         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
3263         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
3264                                              dns_rdatatype_nsec);
3265         
3266         do {
3267                 node = NULL;
3268                 dns_fixedname_init(&fname);
3269                 name = dns_fixedname_name(&fname);
3270                 dns_fixedname_init(&forigin);
3271                 origin = dns_fixedname_name(&forigin);
3272                 result = dns_rbtnodechain_current(&search->chain, name,
3273                                                   origin, &node);
3274                 if (result != ISC_R_SUCCESS)
3275                         return (result);
3276                 locktype = isc_rwlocktype_read;
3277                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
3278                 NODE_LOCK(lock, locktype);
3279                 found = NULL;
3280                 foundsig = NULL;
3281                 empty_node = ISC_TRUE;
3282                 header_prev = NULL;
3283                 for (header = node->data;
3284                      header != NULL;
3285                      header = header_next) {
3286                         header_next = header->next;
3287                         if (header->ttl <= now) {
3288                                 /*
3289                                  * This rdataset is stale.  If no one else is
3290                                  * using the node, we can clean it up right
3291                                  * now, otherwise we mark it as stale, and the
3292                                  * node as dirty, so it will get cleaned up 
3293                                  * later.
3294                                  */
3295                                 if ((header->ttl <= now - RBTDB_VIRTUAL) &&
3296                                     (locktype == isc_rwlocktype_write ||
3297                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3298                                         /*
3299                                          * We update the node's status only
3300                                          * when we can get write access.
3301                                          */
3302                                         locktype = isc_rwlocktype_write;
3303
3304                                         if (dns_rbtnode_refcurrent(node)
3305                                             == 0) {
3306                                                 isc_mem_t *m;
3307
3308                                                 m = search->rbtdb->common.mctx;
3309                                                 clean_stale_headers(m, header);
3310                                                 if (header_prev != NULL)
3311                                                         header_prev->next =
3312                                                                 header->next;
3313                                                 else
3314                                                         node->data = header->next;
3315                                                 free_rdataset(m, header);
3316                                         } else {
3317                                                 header->attributes |=
3318                                                         RDATASET_ATTR_STALE;
3319                                                 node->dirty = 1;
3320                                                 header_prev = header;
3321                                         }
3322                                 } else
3323                                         header_prev = header;
3324                                 continue;
3325                         }
3326                         if (NONEXISTENT(header) ||
3327                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
3328                                 header_prev = header;
3329                                 continue;
3330                         }
3331                         empty_node = ISC_FALSE;
3332                         if (header->type == matchtype)
3333                                 found = header;
3334                         else if (header->type == sigmatchtype)
3335                                 foundsig = header;
3336                         header_prev = header;
3337                 }
3338                 if (found != NULL) {
3339                         result = dns_name_concatenate(name, origin,
3340                                                       foundname, NULL);
3341                         if (result != ISC_R_SUCCESS)
3342                                 goto unlock_node;
3343                         bind_rdataset(search->rbtdb, node, found,
3344                                       now, rdataset);
3345                         if (foundsig != NULL)
3346                                 bind_rdataset(search->rbtdb, node, foundsig,
3347                                               now, sigrdataset);
3348                         new_reference(search->rbtdb, node);
3349                         *nodep = node;
3350                         result = DNS_R_COVERINGNSEC;
3351                 } else if (!empty_node) {
3352                         result = ISC_R_NOTFOUND;
3353                 } else
3354                         result = dns_rbtnodechain_prev(&search->chain, NULL,
3355                                                        NULL);
3356  unlock_node:
3357                 NODE_UNLOCK(lock, locktype);
3358         } while (empty_node && result == ISC_R_SUCCESS);
3359         return (result);
3360 }
3361
3362 static isc_result_t
3363 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3364            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3365            dns_dbnode_t **nodep, dns_name_t *foundname,
3366            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3367 {
3368         dns_rbtnode_t *node = NULL;
3369         isc_result_t result;
3370         rbtdb_search_t search;
3371         isc_boolean_t cname_ok = ISC_TRUE;
3372         isc_boolean_t empty_node;
3373         nodelock_t *lock;
3374         isc_rwlocktype_t locktype;
3375         rdatasetheader_t *header, *header_prev, *header_next;
3376         rdatasetheader_t *found, *nsheader;
3377         rdatasetheader_t *foundsig, *nssig, *cnamesig;
3378         rbtdb_rdatatype_t sigtype, negtype;
3379
3380         UNUSED(version);
3381
3382         search.rbtdb = (dns_rbtdb_t *)db;
3383
3384         REQUIRE(VALID_RBTDB(search.rbtdb));
3385         REQUIRE(version == NULL);
3386
3387         if (now == 0)
3388                 isc_stdtime_get(&now);
3389
3390         search.rbtversion = NULL;
3391         search.serial = 1;
3392         search.options = options;
3393         search.copy_name = ISC_FALSE;
3394         search.need_cleanup = ISC_FALSE;
3395         search.wild = ISC_FALSE;
3396         search.zonecut = NULL;
3397         dns_fixedname_init(&search.zonecut_name);
3398         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3399         search.now = now;
3400
3401         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3402
3403         /*
3404          * Search down from the root of the tree.  If, while going down, we
3405          * encounter a callback node, cache_zonecut_callback() will search the
3406          * rdatasets at the zone cut for a DNAME rdataset.
3407          */
3408         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
3409                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
3410                                   cache_zonecut_callback, &search);
3411
3412         if (result == DNS_R_PARTIALMATCH) {
3413                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
3414                         result = find_coveringnsec(&search, nodep, now,
3415                                                    foundname, rdataset,
3416                                                    sigrdataset);
3417                         if (result == DNS_R_COVERINGNSEC)
3418                                 goto tree_exit;
3419                 }
3420                 if (search.zonecut != NULL) {
3421                     result = setup_delegation(&search, nodep, foundname,
3422                                               rdataset, sigrdataset);
3423                     goto tree_exit;
3424                 } else {
3425                 find_ns:
3426                         result = find_deepest_zonecut(&search, node, nodep,
3427                                                       foundname, rdataset,
3428                                                       sigrdataset);
3429                         goto tree_exit;
3430                 }
3431         } else if (result != ISC_R_SUCCESS)
3432                 goto tree_exit;
3433
3434         /*
3435          * Certain DNSSEC types are not subject to CNAME matching
3436          * (RFC4035, section 2.5 and RFC3007).
3437          *
3438          * We don't check for RRSIG, because we don't store RRSIG records
3439          * directly.
3440          */
3441         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3442                 cname_ok = ISC_FALSE;
3443
3444         /*
3445          * We now go looking for rdata...
3446          */
3447
3448         lock = &(search.rbtdb->node_locks[node->locknum].lock);
3449         locktype = isc_rwlocktype_read;
3450         NODE_LOCK(lock, locktype);
3451
3452         found = NULL;
3453         foundsig = NULL;
3454         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3455         negtype = RBTDB_RDATATYPE_VALUE(0, type);
3456         nsheader = NULL;
3457         nssig = NULL;
3458         cnamesig = NULL;
3459         empty_node = ISC_TRUE;
3460         header_prev = NULL;
3461         for (header = node->data; header != NULL; header = header_next) {
3462                 header_next = header->next;
3463                 if (header->ttl <= now) {
3464                         /*
3465                          * This rdataset is stale.  If no one else is using the
3466                          * node, we can clean it up right now, otherwise we
3467                          * mark it as stale, and the node as dirty, so it will
3468                          * get cleaned up later.
3469                          */
3470                         if ((header->ttl <= now - RBTDB_VIRTUAL) &&
3471                             (locktype == isc_rwlocktype_write ||
3472                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3473                                 /*
3474                                  * We update the node's status only when we
3475                                  * can get write access.
3476                                  */
3477                                 locktype = isc_rwlocktype_write;
3478
3479                                 if (dns_rbtnode_refcurrent(node) == 0) {
3480                                         isc_mem_t *mctx;
3481
3482                                         mctx = search.rbtdb->common.mctx;
3483                                         clean_stale_headers(mctx, header);
3484                                         if (header_prev != NULL)
3485                                                 header_prev->next =
3486                                                         header->next;
3487                                         else
3488                                                 node->data = header->next;
3489                                         free_rdataset(mctx, header);
3490                                 } else {
3491                                         header->attributes |=
3492                                                 RDATASET_ATTR_STALE;
3493                                         node->dirty = 1;
3494                                         header_prev = header;
3495                                 }
3496                         } else
3497                                 header_prev = header;
3498                 } else if (EXISTS(header)) {
3499                         /*
3500                          * We now know that there is at least one active
3501                          * non-stale rdataset at this node.
3502                          */
3503                         empty_node = ISC_FALSE;
3504
3505                         /*
3506                          * If we found a type we were looking for, remember
3507                          * it.
3508                          */
3509                         if (header->type == type ||
3510                             (type == dns_rdatatype_any &&
3511                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
3512                             (cname_ok && header->type ==
3513                              dns_rdatatype_cname)) {
3514                                 /*
3515                                  * We've found the answer.
3516                                  */
3517                                 found = header;
3518                                 if (header->type == dns_rdatatype_cname &&
3519                                     cname_ok &&
3520                                     cnamesig != NULL) {
3521                                         /*
3522                                          * If we've already got the CNAME RRSIG,
3523                                          * use it, otherwise change sigtype
3524                                          * so that we find it.
3525                                          */
3526                                         if (cnamesig != NULL)
3527                                                 foundsig = cnamesig;
3528                                         else
3529                                                 sigtype =
3530                                                     RBTDB_RDATATYPE_SIGCNAME;
3531                                         foundsig = cnamesig;
3532                                 }
3533                         } else if (header->type == sigtype) {
3534                                 /*
3535                                  * We've found the RRSIG rdataset for our
3536                                  * target type.  Remember it.
3537                                  */
3538                                 foundsig = header;
3539                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
3540                                    header->type == negtype) {
3541                                 /*
3542                                  * We've found a negative cache entry.
3543                                  */
3544                                 found = header;
3545                         } else if (header->type == dns_rdatatype_ns) {
3546                                 /*
3547                                  * Remember a NS rdataset even if we're
3548                                  * not specifically looking for it, because
3549                                  * we might need it later.
3550                                  */
3551                                 nsheader = header;
3552                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
3553                                 /*
3554                                  * If we need the NS rdataset, we'll also
3555                                  * need its signature.
3556                                  */
3557                                 nssig = header;
3558                         } else if (cname_ok &&
3559                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3560                                 /*
3561                                  * If we get a CNAME match, we'll also need
3562                                  * its signature.
3563                                  */
3564                                 cnamesig = header;
3565                         }
3566                         header_prev = header;
3567                 } else
3568                         header_prev = header;
3569         }
3570
3571         if (empty_node) {
3572                 /*
3573                  * We have an exact match for the name, but there are no
3574                  * extant rdatasets.  That means that this node doesn't
3575                  * meaningfully exist, and that we really have a partial match.
3576                  */
3577                 NODE_UNLOCK(lock, locktype);
3578                 goto find_ns;
3579         }
3580
3581         /*
3582          * If we didn't find what we were looking for...
3583          */
3584         if (found == NULL ||
3585             (found->trust == dns_trust_glue &&
3586              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
3587             (found->trust == dns_trust_pending &&
3588              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
3589                 /*
3590                  * If there is an NS rdataset at this node, then this is the
3591                  * deepest zone cut.
3592                  */
3593                 if (nsheader != NULL) {
3594                         if (nodep != NULL) {
3595                                 new_reference(search.rbtdb, node);
3596                                 *nodep = node;
3597                         }
3598                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
3599                                       rdataset);
3600                         if (nssig != NULL)
3601                                 bind_rdataset(search.rbtdb, node, nssig,
3602                                               search.now, sigrdataset);
3603                         result = DNS_R_DELEGATION;
3604                         goto node_exit;
3605                 }
3606
3607                 /*
3608                  * Go find the deepest zone cut.
3609                  */
3610                 NODE_UNLOCK(lock, locktype);
3611                 goto find_ns;
3612         }
3613
3614         /*
3615          * We found what we were looking for, or we found a CNAME.
3616          */
3617
3618         if (nodep != NULL) {
3619                 new_reference(search.rbtdb, node);
3620                 *nodep = node;
3621         }
3622
3623         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
3624                 /*
3625                  * We found a negative cache entry.
3626                  */
3627                 if (NXDOMAIN(found))
3628                         result = DNS_R_NCACHENXDOMAIN;
3629                 else
3630                         result = DNS_R_NCACHENXRRSET;
3631         } else if (type != found->type &&
3632                    type != dns_rdatatype_any &&
3633                    found->type == dns_rdatatype_cname) {
3634                 /*
3635                  * We weren't doing an ANY query and we found a CNAME instead
3636                  * of the type we were looking for, so we need to indicate
3637                  * that result to the caller.
3638                  */
3639                 result = DNS_R_CNAME;
3640         } else {
3641                 /*
3642                  * An ordinary successful query!
3643                  */
3644                 result = ISC_R_SUCCESS;
3645         }
3646
3647         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
3648             result == DNS_R_NCACHENXRRSET) {
3649                 bind_rdataset(search.rbtdb, node, found, search.now,
3650                               rdataset);
3651                 if (foundsig != NULL)
3652                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
3653                                       sigrdataset);
3654         }
3655
3656  node_exit:
3657         NODE_UNLOCK(lock, locktype);
3658
3659  tree_exit:
3660         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3661
3662         /*
3663          * If we found a zonecut but aren't going to use it, we have to
3664          * let go of it.
3665          */
3666         if (search.need_cleanup) {
3667                 node = search.zonecut;
3668                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3669
3670                 NODE_LOCK(lock, isc_rwlocktype_read);
3671                 decrement_reference(search.rbtdb, node, 0,
3672                                     isc_rwlocktype_read, isc_rwlocktype_none);
3673                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3674         }
3675
3676         dns_rbtnodechain_reset(&search.chain);
3677
3678         return (result);
3679 }
3680
3681 static isc_result_t
3682 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3683                   isc_stdtime_t now, dns_dbnode_t **nodep,
3684                   dns_name_t *foundname,
3685                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3686 {
3687         dns_rbtnode_t *node = NULL;
3688         nodelock_t *lock;
3689         isc_result_t result;
3690         rbtdb_search_t search;
3691         rdatasetheader_t *header, *header_prev, *header_next;
3692         rdatasetheader_t *found, *foundsig;
3693         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
3694         isc_rwlocktype_t locktype;
3695
3696         search.rbtdb = (dns_rbtdb_t *)db;
3697
3698         REQUIRE(VALID_RBTDB(search.rbtdb));
3699
3700         if (now == 0)
3701                 isc_stdtime_get(&now);
3702
3703         search.rbtversion = NULL;
3704         search.serial = 1;
3705         search.options = options;
3706         search.copy_name = ISC_FALSE;
3707         search.need_cleanup = ISC_FALSE;
3708         search.wild = ISC_FALSE;
3709         search.zonecut = NULL;
3710         dns_fixedname_init(&search.zonecut_name);
3711         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3712         search.now = now;
3713
3714         if ((options & DNS_DBFIND_NOEXACT) != 0)
3715                 rbtoptions |= DNS_RBTFIND_NOEXACT;
3716
3717         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3718
3719         /*
3720          * Search down from the root of the tree.
3721          */
3722         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
3723                                   &search.chain, rbtoptions, NULL, &search);
3724
3725         if (result == DNS_R_PARTIALMATCH) {
3726         find_ns:
3727                 result = find_deepest_zonecut(&search, node, nodep, foundname,
3728                                               rdataset, sigrdataset);
3729                 goto tree_exit;
3730         } else if (result != ISC_R_SUCCESS)
3731                 goto tree_exit;
3732
3733         /*
3734          * We now go looking for an NS rdataset at the node.
3735          */
3736
3737         lock = &(search.rbtdb->node_locks[node->locknum].lock);
3738         locktype = isc_rwlocktype_read;
3739         NODE_LOCK(lock, locktype);
3740
3741         found = NULL;
3742         foundsig = NULL;
3743         header_prev = NULL;
3744         for (header = node->data; header != NULL; header = header_next) {
3745                 header_next = header->next;
3746                 if (header->ttl <= now) {
3747                         /*
3748                          * This rdataset is stale.  If no one else is using the
3749                          * node, we can clean it up right now, otherwise we
3750                          * mark it as stale, and the node as dirty, so it will
3751                          * get cleaned up later.
3752                          */
3753                         if ((header->ttl <= now - RBTDB_VIRTUAL) &&
3754                             (locktype == isc_rwlocktype_write ||
3755                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3756                                 /*
3757                                  * We update the node's status only when we
3758                                  * can get write access.
3759                                  */
3760                                 locktype = isc_rwlocktype_write;
3761
3762                                 if (dns_rbtnode_refcurrent(node) == 0) {
3763                                         isc_mem_t *mctx;
3764
3765                                         mctx = search.rbtdb->common.mctx;
3766                                         clean_stale_headers(mctx, header);
3767                                         if (header_prev != NULL)
3768                                                 header_prev->next =
3769                                                         header->next;
3770                                         else
3771                                                 node->data = header->next;
3772                                         free_rdataset(mctx, header);
3773                                 } else {
3774                                         header->attributes |=
3775                                                 RDATASET_ATTR_STALE;
3776                                         node->dirty = 1;
3777                                         header_prev = header;
3778                                 }
3779                         } else
3780                                 header_prev = header;
3781                 } else if (EXISTS(header)) {
3782                         /*
3783                          * If we found a type we were looking for, remember
3784                          * it.
3785                          */
3786                         if (header->type == dns_rdatatype_ns) {
3787                                 /*
3788                                  * Remember a NS rdataset even if we're
3789                                  * not specifically looking for it, because
3790                                  * we might need it later.
3791                                  */
3792                                 found = header;
3793                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
3794                                 /*
3795                                  * If we need the NS rdataset, we'll also
3796                                  * need its signature.
3797                                  */
3798                                 foundsig = header;
3799                         }
3800                         header_prev = header;
3801                 } else
3802                         header_prev = header;
3803         }
3804
3805         if (found == NULL) {
3806                 /*
3807                  * No NS records here.
3808                  */
3809                 NODE_UNLOCK(lock, locktype);
3810                 goto find_ns;
3811         }
3812
3813         if (nodep != NULL) {
3814                 new_reference(search.rbtdb, node);
3815                 *nodep = node;
3816         }
3817
3818         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
3819         if (foundsig != NULL)
3820                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
3821                               sigrdataset);
3822
3823         NODE_UNLOCK(lock, locktype);
3824
3825  tree_exit:
3826         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3827
3828         INSIST(!search.need_cleanup);
3829
3830         dns_rbtnodechain_reset(&search.chain);
3831
3832         if (result == DNS_R_DELEGATION)
3833                 result = ISC_R_SUCCESS;
3834
3835         return (result);
3836 }
3837
3838 static void
3839 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
3840         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3841         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
3842         unsigned int refs;
3843
3844         REQUIRE(VALID_RBTDB(rbtdb));
3845         REQUIRE(targetp != NULL && *targetp == NULL);
3846
3847         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
3848         dns_rbtnode_refincrement(node, &refs);
3849         INSIST(refs != 0);
3850         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
3851
3852         *targetp = source;
3853 }
3854
3855 static void
3856 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
3857         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3858         dns_rbtnode_t *node;
3859         isc_boolean_t want_free = ISC_FALSE;
3860         isc_boolean_t inactive = ISC_FALSE;
3861         rbtdb_nodelock_t *nodelock;
3862
3863         REQUIRE(VALID_RBTDB(rbtdb));
3864         REQUIRE(targetp != NULL && *targetp != NULL);
3865
3866         node = (dns_rbtnode_t *)(*targetp);
3867         nodelock = &rbtdb->node_locks[node->locknum];
3868
3869         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
3870
3871         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
3872                                 isc_rwlocktype_none)) {
3873                 if (isc_refcount_current(&nodelock->references) == 0 &&
3874                     nodelock->exiting) {
3875                         inactive = ISC_TRUE;
3876                 }
3877         }
3878
3879         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
3880
3881         *targetp = NULL;
3882
3883         if (inactive) {
3884                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
3885                 rbtdb->active--;
3886                 if (rbtdb->active == 0)
3887                         want_free = ISC_TRUE;
3888                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
3889                 if (want_free) {
3890                         char buf[DNS_NAME_FORMATSIZE];
3891                         if (dns_name_dynamic(&rbtdb->common.origin))
3892                                 dns_name_format(&rbtdb->common.origin, buf,
3893                                                 sizeof(buf));
3894                         else
3895                                 strcpy(buf, "<UNKNOWN>");
3896                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
3897                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
3898                                       "calling free_rbtdb(%s)", buf);
3899                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
3900                 }
3901         }
3902 }
3903
3904 static isc_result_t
3905 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
3906         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3907         dns_rbtnode_t *rbtnode = node;
3908         rdatasetheader_t *header;
3909         isc_boolean_t force_expire = ISC_FALSE;
3910         /*
3911          * These are the category and module used by the cache cleaner.
3912          */
3913         isc_boolean_t log = ISC_FALSE;
3914         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
3915         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
3916         int level = ISC_LOG_DEBUG(2);
3917         char printname[DNS_NAME_FORMATSIZE];
3918
3919         REQUIRE(VALID_RBTDB(rbtdb));
3920
3921         /*
3922          * Caller must hold a tree lock.
3923          */
3924
3925         if (now == 0)
3926                 isc_stdtime_get(&now);
3927
3928         if (rbtdb->overmem) {
3929                 isc_uint32_t val;
3930
3931                 isc_random_get(&val);
3932                 /*
3933                  * XXXDCL Could stand to have a better policy, like LRU.
3934                  */
3935                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
3936
3937                 /*
3938                  * Note that 'log' can be true IFF rbtdb->overmem is also true.
3939                  * rbtdb->ovemem can currently only be true for cache databases
3940                  * -- hence all of the "overmem cache" log strings.
3941                  */
3942                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
3943                 if (log)
3944                         isc_log_write(dns_lctx, category, module, level,
3945                                       "overmem cache: %s %s",
3946                                       force_expire ? "FORCE" : "check",
3947                                       dns_rbt_formatnodename(rbtnode,
3948                                                            printname,
3949                                                            sizeof(printname)));
3950         }
3951
3952         /*
3953          * We may not need write access, but this code path is not performance
3954          * sensitive, so it should be okay to always lock as a writer.
3955          */
3956         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
3957                   isc_rwlocktype_write);
3958
3959         for (header = rbtnode->data; header != NULL; header = header->next)
3960                 if (header->ttl <= now - RBTDB_VIRTUAL) {
3961                         /*
3962                          * We don't check if refcurrent(rbtnode) == 0 and try
3963                          * to free like we do in cache_find(), because
3964                          * refcurrent(rbtnode) must be non-zero.  This is so
3965                          * because 'node' is an argument to the function.
3966                          */
3967                         header->attributes |= RDATASET_ATTR_STALE;
3968                         rbtnode->dirty = 1;
3969                         if (log)
3970                                 isc_log_write(dns_lctx, category, module,
3971                                               level, "overmem cache: stale %s",
3972                                               printname);
3973                 } else if (force_expire) {
3974                         if (! RETAIN(header)) {
3975                                 header->ttl = 0;
3976                                 header->attributes |= RDATASET_ATTR_STALE;
3977                                 rbtnode->dirty = 1;
3978                         } else if (log) {
3979                                 isc_log_write(dns_lctx, category, module,
3980                                               level, "overmem cache: "
3981                                               "reprieve by RETAIN() %s",
3982                                               printname);
3983                         }
3984                 } else if (rbtdb->overmem && log)
3985                         isc_log_write(dns_lctx, category, module, level,
3986                                       "overmem cache: saved %s", printname);
3987
3988         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
3989                     isc_rwlocktype_write);
3990
3991         return (ISC_R_SUCCESS);
3992 }
3993
3994 static void
3995 overmem(dns_db_t *db, isc_boolean_t overmem) {
3996         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3997
3998         if (IS_CACHE(rbtdb)) {
3999                 rbtdb->overmem = overmem;
4000         }
4001 }
4002
4003 static void
4004 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
4005         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4006         dns_rbtnode_t *rbtnode = node;
4007         isc_boolean_t first;
4008
4009         REQUIRE(VALID_RBTDB(rbtdb));
4010
4011         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4012                   isc_rwlocktype_read);
4013
4014         fprintf(out, "node %p, %u references, locknum = %u\n",
4015                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
4016                 rbtnode->locknum);
4017         if (rbtnode->data != NULL) {
4018                 rdatasetheader_t *current, *top_next;
4019
4020                 for (current = rbtnode->data; current != NULL;
4021                      current = top_next) {
4022                         top_next = current->next;
4023                         first = ISC_TRUE;
4024                         fprintf(out, "\ttype %u", current->type);
4025                         do {
4026                                 if (!first)
4027                                         fprintf(out, "\t");
4028                                 first = ISC_FALSE;
4029                                 fprintf(out,
4030                                         "\tserial = %lu, ttl = %u, "
4031                                         "trust = %u, attributes = %u\n",
4032                                         (unsigned long)current->serial,
4033                                         current->ttl,
4034                                         current->trust,
4035                                         current->attributes);
4036                                 current = current->down;
4037                         } while (current != NULL);
4038                 }
4039         } else
4040                 fprintf(out, "(empty)\n");
4041
4042         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4043                     isc_rwlocktype_read);
4044 }
4045
4046 static isc_result_t
4047 createiterator(dns_db_t *db, isc_boolean_t relative_names,
4048                dns_dbiterator_t **iteratorp)
4049 {
4050         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4051         rbtdb_dbiterator_t *rbtdbiter;
4052
4053         REQUIRE(VALID_RBTDB(rbtdb));
4054
4055         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
4056         if (rbtdbiter == NULL)
4057                 return (ISC_R_NOMEMORY);
4058
4059         rbtdbiter->common.methods = &dbiterator_methods;
4060         rbtdbiter->common.db = NULL;
4061         dns_db_attach(db, &rbtdbiter->common.db);
4062         rbtdbiter->common.relative_names = relative_names;
4063         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
4064         rbtdbiter->common.cleaning = ISC_FALSE;
4065         rbtdbiter->paused = ISC_TRUE;
4066         rbtdbiter->tree_locked = isc_rwlocktype_none;
4067         rbtdbiter->result = ISC_R_SUCCESS;
4068         dns_fixedname_init(&rbtdbiter->name);
4069         dns_fixedname_init(&rbtdbiter->origin);
4070         rbtdbiter->node = NULL;
4071         rbtdbiter->delete = 0;
4072         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
4073         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
4074
4075         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
4076
4077         return (ISC_R_SUCCESS);
4078 }
4079
4080 static isc_result_t
4081 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4082                   dns_rdatatype_t type, dns_rdatatype_t covers,
4083                   isc_stdtime_t now, dns_rdataset_t *rdataset,
4084                   dns_rdataset_t *sigrdataset)
4085 {
4086         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4087         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4088         rdatasetheader_t *header, *header_next, *found, *foundsig;
4089         rbtdb_serial_t serial;
4090         rbtdb_version_t *rbtversion = version;
4091         isc_boolean_t close_version = ISC_FALSE;
4092         rbtdb_rdatatype_t matchtype, sigmatchtype;
4093
4094         REQUIRE(VALID_RBTDB(rbtdb));
4095         REQUIRE(type != dns_rdatatype_any);
4096
4097         if (rbtversion == NULL) {
4098                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
4099                 close_version = ISC_TRUE;
4100         }
4101         serial = rbtversion->serial;
4102         now = 0;
4103
4104         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4105                   isc_rwlocktype_read);
4106
4107         found = NULL;
4108         foundsig = NULL;
4109         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
4110         if (covers == 0)
4111                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4112         else
4113                 sigmatchtype = 0;
4114
4115         for (header = rbtnode->data; header != NULL; header = header_next) {
4116                 header_next = header->next;
4117                 do {
4118                         if (header->serial <= serial &&
4119                             !IGNORE(header)) {
4120                                 /*
4121                                  * Is this a "this rdataset doesn't
4122                                  * exist" record?
4123                                  */
4124                                 if (NONEXISTENT(header))
4125                                         header = NULL;
4126                                 break;
4127                         } else
4128                                 header = header->down;
4129                 } while (header != NULL);
4130                 if (header != NULL) {
4131                         /*
4132                          * We have an active, extant rdataset.  If it's a
4133                          * type we're looking for, remember it.
4134                          */
4135                         if (header->type == matchtype) {
4136                                 found = header;
4137                                 if (foundsig != NULL)
4138                                         break;
4139                         } else if (header->type == sigmatchtype) {
4140                                 foundsig = header;
4141                                 if (found != NULL)
4142                                         break;
4143                         }
4144                 }
4145         }
4146         if (found != NULL) {
4147                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
4148                 if (foundsig != NULL)
4149                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
4150                                       sigrdataset);
4151         }
4152
4153         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4154                     isc_rwlocktype_read);
4155
4156         if (close_version)
4157                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
4158                              ISC_FALSE);
4159
4160         if (found == NULL)
4161                 return (ISC_R_NOTFOUND);
4162
4163         return (ISC_R_SUCCESS);
4164 }
4165
4166 static isc_result_t
4167 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4168                    dns_rdatatype_t type, dns_rdatatype_t covers,
4169                    isc_stdtime_t now, dns_rdataset_t *rdataset,
4170                    dns_rdataset_t *sigrdataset)
4171 {
4172         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4173         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4174         rdatasetheader_t *header, *header_next, *found, *foundsig;
4175         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
4176         isc_result_t result;
4177         nodelock_t *lock;
4178         isc_rwlocktype_t locktype;
4179
4180         REQUIRE(VALID_RBTDB(rbtdb));
4181         REQUIRE(type != dns_rdatatype_any);
4182
4183         UNUSED(version);
4184
4185         result = ISC_R_SUCCESS;
4186
4187         if (now == 0)
4188                 isc_stdtime_get(&now);
4189
4190         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
4191         locktype = isc_rwlocktype_read;
4192         NODE_LOCK(lock, locktype);
4193
4194         found = NULL;
4195         foundsig = NULL;
4196         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
4197         negtype = RBTDB_RDATATYPE_VALUE(0, type);
4198         if (covers == 0)
4199                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4200         else
4201                 sigmatchtype = 0;
4202
4203         for (header = rbtnode->data; header != NULL; header = header_next) {
4204                 header_next = header->next;
4205                 if (header->ttl <= now) {
4206                         if ((header->ttl <= now - RBTDB_VIRTUAL) &&
4207                             (locktype == isc_rwlocktype_write ||
4208                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4209                                 /*
4210                                  * We update the node's status only when we
4211                                  * can get write access.
4212                                  */
4213                                 locktype = isc_rwlocktype_write;
4214                                 
4215                                 /*
4216                                  * We don't check if refcurrent(rbtnode) == 0
4217                                  * and try to free like we do in cache_find(),
4218                                  * because refcurrent(rbtnode) must be
4219                                  * non-zero.  This is so because 'node' is an
4220                                  * argument to the function.
4221                                  */
4222                                 header->attributes |= RDATASET_ATTR_STALE;
4223                                 rbtnode->dirty = 1;
4224                         }
4225                 } else if (EXISTS(header)) {
4226                         if (header->type == matchtype)
4227                                 found = header;
4228                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4229                                  header->type == negtype)
4230                                 found = header;
4231                         else if (header->type == sigmatchtype)
4232                                 foundsig = header;
4233                 }
4234         }
4235         if (found != NULL) {
4236                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
4237                 if (foundsig != NULL)
4238                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
4239                                       sigrdataset);
4240         }
4241
4242         NODE_UNLOCK(lock, locktype);
4243
4244         if (found == NULL)
4245                 return (ISC_R_NOTFOUND);
4246
4247         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4248                 /*
4249                  * We found a negative cache entry.
4250                  */
4251                 if (NXDOMAIN(found))
4252                         result = DNS_R_NCACHENXDOMAIN;
4253                 else
4254                         result = DNS_R_NCACHENXRRSET;
4255         }
4256
4257         return (result);
4258 }
4259
4260 static isc_result_t
4261 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4262              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
4263 {
4264         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4265         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4266         rbtdb_version_t *rbtversion = version;
4267         rbtdb_rdatasetiter_t *iterator;
4268         unsigned int refs;
4269
4270         REQUIRE(VALID_RBTDB(rbtdb));
4271
4272         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
4273         if (iterator == NULL)
4274                 return (ISC_R_NOMEMORY);
4275
4276         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
4277                 now = 0;
4278                 if (rbtversion == NULL)
4279                         currentversion(db,
4280                                  (dns_dbversion_t **) (void *)(&rbtversion));
4281                 else {
4282                         unsigned int refs;
4283
4284                         isc_refcount_increment(&rbtversion->references,
4285                                                &refs);
4286                         INSIST(refs > 1);
4287                 }
4288         } else {
4289                 if (now == 0)
4290                         isc_stdtime_get(&now);
4291                 rbtversion = NULL;
4292         }
4293
4294         iterator->common.magic = DNS_RDATASETITER_MAGIC;
4295         iterator->common.methods = &rdatasetiter_methods;
4296         iterator->common.db = db;
4297         iterator->common.node = node;
4298         iterator->common.version = (dns_dbversion_t *)rbtversion;
4299         iterator->common.now = now;
4300
4301         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4302
4303         dns_rbtnode_refincrement(rbtnode, &refs);
4304         INSIST(refs != 0);
4305
4306         iterator->current = NULL;
4307
4308         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4309
4310         *iteratorp = (dns_rdatasetiter_t *)iterator;
4311
4312         return (ISC_R_SUCCESS);
4313 }
4314
4315 static isc_boolean_t
4316 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
4317         rdatasetheader_t *header, *header_next;
4318         isc_boolean_t cname, other_data;
4319         dns_rdatatype_t rdtype;
4320
4321         /*
4322          * The caller must hold the node lock.
4323          */
4324
4325         /*
4326          * Look for CNAME and "other data" rdatasets active in our version.
4327          */
4328         cname = ISC_FALSE;
4329         other_data = ISC_FALSE;
4330         for (header = node->data; header != NULL; header = header_next) {
4331                 header_next = header->next;
4332                 if (header->type == dns_rdatatype_cname) {
4333                         /*
4334                          * Look for an active extant CNAME.
4335                          */
4336                         do {
4337                                 if (header->serial <= serial &&
4338                                     !IGNORE(header)) {
4339                                         /*
4340                                          * Is this a "this rdataset doesn't
4341                                          * exist" record?
4342                                          */
4343                                         if (NONEXISTENT(header))
4344                                                 header = NULL;
4345                                         break;
4346                                 } else
4347                                         header = header->down;
4348                         } while (header != NULL);
4349                         if (header != NULL)
4350                                 cname = ISC_TRUE;
4351                 } else {
4352                         /*
4353                          * Look for active extant "other data".
4354                          *
4355                          * "Other data" is any rdataset whose type is not
4356                          * KEY, RRSIG KEY, NSEC, RRSIG NSEC or RRSIG CNAME.
4357                          */
4358                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
4359                         if (rdtype == dns_rdatatype_rrsig ||
4360                             rdtype == dns_rdatatype_sig)
4361                                 rdtype = RBTDB_RDATATYPE_EXT(header->type);
4362                         if (rdtype != dns_rdatatype_nsec &&
4363                             rdtype != dns_rdatatype_key &&
4364                             rdtype != dns_rdatatype_cname) {
4365                                 /*
4366                                  * We've found a type that isn't
4367                                  * NSEC, KEY, CNAME, or one of their
4368                                  * signatures.  Is it active and extant?
4369                                  */
4370                                 do {
4371                                         if (header->serial <= serial &&
4372                                             !IGNORE(header)) {
4373                                                 /*
4374                                                  * Is this a "this rdataset
4375                                                  * doesn't exist" record?
4376                                                  */
4377                                                 if (NONEXISTENT(header))
4378                                                         header = NULL;
4379                                                 break;
4380                                         } else
4381                                                 header = header->down;
4382                                 } while (header != NULL);
4383                                 if (header != NULL)
4384                                         other_data = ISC_TRUE;
4385                         }
4386                 }
4387         }
4388
4389         if (cname && other_data)
4390                 return (ISC_TRUE);
4391
4392         return (ISC_FALSE);
4393 }
4394
4395 static isc_result_t
4396 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
4397     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
4398     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
4399 {
4400         rbtdb_changed_t *changed = NULL;
4401         rdatasetheader_t *topheader, *topheader_prev, *header;
4402         unsigned char *merged;
4403         isc_result_t result;
4404         isc_boolean_t header_nx;
4405         isc_boolean_t newheader_nx;
4406         isc_boolean_t merge;
4407         dns_rdatatype_t rdtype, covers;
4408         rbtdb_rdatatype_t negtype;
4409         dns_trust_t trust;
4410
4411         /*
4412          * Add an rdatasetheader_t to a node.
4413          */
4414
4415         /*
4416          * Caller must be holding the node lock.
4417          */
4418
4419         if ((options & DNS_DBADD_MERGE) != 0) {
4420                 REQUIRE(rbtversion != NULL);
4421                 merge = ISC_TRUE;
4422         } else
4423                 merge = ISC_FALSE;
4424
4425         if ((options & DNS_DBADD_FORCE) != 0)
4426                 trust = dns_trust_ultimate;
4427         else
4428                 trust = newheader->trust;
4429
4430         if (rbtversion != NULL && !loading) {
4431                 /*
4432                  * We always add a changed record, even if no changes end up
4433                  * being made to this node, because it's harmless and
4434                  * simplifies the code.
4435                  */
4436                 changed = add_changed(rbtdb, rbtversion, rbtnode);
4437                 if (changed == NULL) {
4438                         free_rdataset(rbtdb->common.mctx, newheader);
4439                         return (ISC_R_NOMEMORY);
4440                 }
4441         }
4442
4443         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
4444         topheader_prev = NULL;
4445
4446         negtype = 0;
4447         if (rbtversion == NULL && !newheader_nx) {
4448                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
4449                 if (rdtype == 0) {
4450                         /*
4451                          * We're adding a negative cache entry.
4452                          */
4453                         covers = RBTDB_RDATATYPE_EXT(newheader->type);
4454                         if (covers == dns_rdatatype_any) {
4455                                 /*
4456                                  * We're adding an negative cache entry
4457                                  * which covers all types (NXDOMAIN,
4458                                  * NODATA(QTYPE=ANY)).
4459                                  *
4460                                  * We make all other data stale so that the
4461                                  * only rdataset that can be found at this
4462                                  * node is the negative cache entry.
4463                                  */
4464                                 for (topheader = rbtnode->data;
4465                                      topheader != NULL;
4466                                      topheader = topheader->next) {
4467                                         topheader->ttl = 0;
4468                                         topheader->attributes |=
4469                                                 RDATASET_ATTR_STALE;
4470                                 }
4471                                 rbtnode->dirty = 1;
4472                                 goto find_header;
4473                         }
4474                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
4475                 } else {
4476                         /*
4477                          * We're adding something that isn't a
4478                          * negative cache entry.  Look for an extant
4479                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
4480                          * cache entry.
4481                          */
4482                         for (topheader = rbtnode->data;
4483                              topheader != NULL;
4484                              topheader = topheader->next) {
4485                                 if (topheader->type == 
4486                                     RBTDB_RDATATYPE_NCACHEANY)
4487                                         break;
4488                         }
4489                         if (topheader != NULL && EXISTS(topheader) &&
4490                             topheader->ttl > now) {
4491                                 /*
4492                                  * Found one.
4493                                  */
4494                                 if (trust < topheader->trust) {
4495                                         /*
4496                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
4497                                          * is more trusted.
4498                                          */
4499                                         
4500                                         free_rdataset(rbtdb->common.mctx,
4501                                                       newheader);
4502                                         if (addedrdataset != NULL)
4503                                                 bind_rdataset(rbtdb, rbtnode,
4504                                                               topheader, now,
4505                                                               addedrdataset);
4506                                         return (DNS_R_UNCHANGED);
4507                                 }
4508                                 /*
4509                                  * The new rdataset is better.  Expire the
4510                                  * NXDOMAIN/NODATA(QTYPE=ANY).
4511                                  */
4512                                 topheader->ttl = 0;
4513                                 topheader->attributes |= RDATASET_ATTR_STALE;
4514                                 rbtnode->dirty = 1;
4515                                 topheader = NULL;
4516                                 goto find_header;
4517                         }
4518                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
4519                 }
4520         }
4521
4522         for (topheader = rbtnode->data;
4523              topheader != NULL;
4524              topheader = topheader->next) {
4525                 if (topheader->type == newheader->type ||
4526                     topheader->type == negtype)
4527                         break;
4528                 topheader_prev = topheader;
4529         }
4530
4531  find_header:
4532         /*
4533          * If header isn't NULL, we've found the right type.  There may be
4534          * IGNORE rdatasets between the top of the chain and the first real
4535          * data.  We skip over them.
4536          */
4537         header = topheader;
4538         while (header != NULL && IGNORE(header))
4539                 header = header->down;
4540         if (header != NULL) {
4541                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
4542
4543                 /*
4544                  * Deleting an already non-existent rdataset has no effect.
4545                  */
4546                 if (header_nx && newheader_nx) {
4547                         free_rdataset(rbtdb->common.mctx, newheader);
4548                         return (DNS_R_UNCHANGED);
4549                 }
4550
4551                 /*
4552                  * Trying to add an rdataset with lower trust to a cache DB
4553                  * has no effect, provided that the cache data isn't stale.
4554                  */
4555                 if (rbtversion == NULL && trust < header->trust &&
4556                     (header->ttl > now || header_nx)) {
4557                         free_rdataset(rbtdb->common.mctx, newheader);
4558                         if (addedrdataset != NULL)
4559                                 bind_rdataset(rbtdb, rbtnode, header, now,
4560                                               addedrdataset);
4561                         return (DNS_R_UNCHANGED);
4562                 }
4563
4564                 /*
4565                  * Don't merge if a nonexistent rdataset is involved.
4566                  */
4567                 if (merge && (header_nx || newheader_nx))
4568                         merge = ISC_FALSE;
4569
4570                 /*
4571                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
4572                  * that is the union of 'newheader' and 'header'.
4573                  */
4574                 if (merge) {
4575                         unsigned int flags = 0;
4576                         INSIST(rbtversion->serial >= header->serial);
4577                         merged = NULL;
4578                         result = ISC_R_SUCCESS;
4579                         
4580                         if ((options & DNS_DBADD_EXACT) != 0)
4581                                 flags |= DNS_RDATASLAB_EXACT;
4582                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
4583                              newheader->ttl != header->ttl)
4584                                         result = DNS_R_NOTEXACT;
4585                         else if (newheader->ttl != header->ttl)
4586                                 flags |= DNS_RDATASLAB_FORCE;
4587                         if (result == ISC_R_SUCCESS)
4588                                 result = dns_rdataslab_merge(
4589                                              (unsigned char *)header,
4590                                              (unsigned char *)newheader,
4591                                              (unsigned int)(sizeof(*newheader)),
4592                                              rbtdb->common.mctx,
4593                                              rbtdb->common.rdclass,
4594                                              (dns_rdatatype_t)header->type,
4595                                              flags, &merged);
4596                         if (result == ISC_R_SUCCESS) {
4597                                 /*
4598                                  * If 'header' has the same serial number as
4599                                  * we do, we could clean it up now if we knew
4600                                  * that our caller had no references to it.
4601                                  * We don't know this, however, so we leave it
4602                                  * alone.  It will get cleaned up when
4603                                  * clean_zone_node() runs.
4604                                  */
4605                                 free_rdataset(rbtdb->common.mctx, newheader);
4606                                 newheader = (rdatasetheader_t *)merged;
4607                         } else {
4608                                 free_rdataset(rbtdb->common.mctx, newheader);
4609                                 return (result);
4610                         }
4611                 }
4612                 /*
4613                  * Don't replace existing NS, A and AAAA RRsets
4614                  * in the cache if they are already exist.  This
4615                  * prevents named being locked to old servers.
4616                  * Don't lower trust of existing record if the
4617                  * update is forced.
4618                  */
4619                 if (IS_CACHE(rbtdb) && header->ttl > now &&
4620                     header->type == dns_rdatatype_ns &&
4621                     !header_nx && !newheader_nx &&
4622                     header->trust >= newheader->trust &&
4623                     dns_rdataslab_equalx((unsigned char *)header,
4624                                          (unsigned char *)newheader,
4625                                          (unsigned int)(sizeof(*newheader)),
4626                                          rbtdb->common.rdclass,
4627                                          (dns_rdatatype_t)header->type)) {
4628                         /*
4629                          * Honour the new ttl if it is less than the
4630                          * older one.
4631                          */
4632                         if (header->ttl > newheader->ttl)
4633                                 header->ttl = newheader->ttl;
4634                         if (header->noqname == NULL &&
4635                             newheader->noqname != NULL) {
4636                                 header->noqname = newheader->noqname;
4637                                 newheader->noqname = NULL;
4638                         }
4639                         free_rdataset(rbtdb->common.mctx, newheader);
4640                         if (addedrdataset != NULL)
4641                                 bind_rdataset(rbtdb, rbtnode, header, now,
4642                                               addedrdataset);
4643                         return (ISC_R_SUCCESS);
4644                 }
4645                 if (IS_CACHE(rbtdb) && header->ttl > now &&
4646                     (header->type == dns_rdatatype_a ||
4647                      header->type == dns_rdatatype_aaaa) &&
4648                     !header_nx && !newheader_nx &&
4649                     header->trust >= newheader->trust &&
4650                     dns_rdataslab_equal((unsigned char *)header,
4651                                         (unsigned char *)newheader,
4652                                         (unsigned int)(sizeof(*newheader)))) {
4653                         /*
4654                          * Honour the new ttl if it is less than the
4655                          * older one.
4656                          */
4657                         if (header->ttl > newheader->ttl)
4658                                 header->ttl = newheader->ttl;
4659                         if (header->noqname == NULL &&
4660                             newheader->noqname != NULL) {
4661                                 header->noqname = newheader->noqname;
4662                                 newheader->noqname = NULL;
4663                         }
4664                         free_rdataset(rbtdb->common.mctx, newheader);
4665                         if (addedrdataset != NULL)
4666                                 bind_rdataset(rbtdb, rbtnode, header, now,
4667                                               addedrdataset);
4668                         return (ISC_R_SUCCESS);
4669                 }
4670                 INSIST(rbtversion == NULL ||
4671                        rbtversion->serial >= topheader->serial);
4672                 if (topheader_prev != NULL)
4673                         topheader_prev->next = newheader;
4674                 else
4675                         rbtnode->data = newheader;
4676                 newheader->next = topheader->next;
4677                 if (loading) {
4678                         /*
4679                          * There are no other references to 'header' when
4680                          * loading, so we MAY clean up 'header' now.
4681                          * Since we don't generate changed records when
4682                          * loading, we MUST clean up 'header' now.
4683                          */
4684                         newheader->down = NULL;
4685                         free_rdataset(rbtdb->common.mctx, header);
4686                 } else {
4687                         newheader->down = topheader;
4688                         topheader->next = newheader;
4689                         rbtnode->dirty = 1;
4690                         if (changed != NULL)
4691                                 changed->dirty = ISC_TRUE;
4692                         if (rbtversion == NULL) {
4693                                 header->ttl = 0;
4694                                 header->attributes |= RDATASET_ATTR_STALE;
4695                         }
4696                 }
4697         } else {
4698                 /*
4699                  * No non-IGNORED rdatasets of the given type exist at
4700                  * this node.
4701                  */
4702
4703                 /*
4704                  * If we're trying to delete the type, don't bother.
4705                  */
4706                 if (newheader_nx) {
4707                         free_rdataset(rbtdb->common.mctx, newheader);
4708                         return (DNS_R_UNCHANGED);
4709                 }
4710
4711                 if (topheader != NULL) {
4712                         /*
4713                          * We have an list of rdatasets of the given type,
4714                          * but they're all marked IGNORE.  We simply insert
4715                          * the new rdataset at the head of the list.
4716                          *
4717                          * Ignored rdatasets cannot occur during loading, so
4718                          * we INSIST on it.
4719                          */
4720                         INSIST(!loading);
4721                         INSIST(rbtversion == NULL ||
4722                                rbtversion->serial >= topheader->serial);
4723                         if (topheader_prev != NULL)
4724                                 topheader_prev->next = newheader;
4725                         else
4726                                 rbtnode->data = newheader;
4727                         newheader->next = topheader->next;
4728                         newheader->down = topheader;
4729                         topheader->next = newheader;
4730                         rbtnode->dirty = 1;
4731                         if (changed != NULL)
4732                                 changed->dirty = ISC_TRUE;
4733                 } else {
4734                         /*
4735                          * No rdatasets of the given type exist at the node.
4736                          */
4737                         newheader->next = rbtnode->data;
4738                         newheader->down = NULL;
4739                         rbtnode->data = newheader;
4740                 }
4741         }
4742
4743         /*
4744          * Check if the node now contains CNAME and other data.
4745          */
4746         if (rbtversion != NULL &&
4747             cname_and_other_data(rbtnode, rbtversion->serial))
4748                 return (DNS_R_CNAMEANDOTHER);
4749
4750         if (addedrdataset != NULL)
4751                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
4752
4753         return (ISC_R_SUCCESS);
4754 }
4755
4756 static inline isc_boolean_t
4757 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
4758                 rbtdb_rdatatype_t type)
4759 {
4760         if (IS_CACHE(rbtdb)) {
4761                 if (type == dns_rdatatype_dname)
4762                         return (ISC_TRUE);
4763                 else
4764                         return (ISC_FALSE);
4765         } else if (type == dns_rdatatype_dname ||
4766                    (type == dns_rdatatype_ns &&
4767                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
4768                 return (ISC_TRUE);
4769         return (ISC_FALSE);
4770 }
4771
4772 static inline isc_result_t
4773 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
4774            dns_rdataset_t *rdataset)
4775 {
4776         struct noqname *noqname;
4777         isc_mem_t *mctx = rbtdb->common.mctx;
4778         dns_name_t name;
4779         dns_rdataset_t nsec, nsecsig;
4780         isc_result_t result;
4781         isc_region_t r;
4782
4783         dns_name_init(&name, NULL);
4784         dns_rdataset_init(&nsec);
4785         dns_rdataset_init(&nsecsig);
4786
4787         result = dns_rdataset_getnoqname(rdataset, &name, &nsec, &nsecsig);
4788         RUNTIME_CHECK(result == ISC_R_SUCCESS);
4789
4790         noqname = isc_mem_get(mctx, sizeof(*noqname));
4791         if (noqname == NULL) {
4792                 result = ISC_R_NOMEMORY;
4793                 goto cleanup;
4794         }
4795         dns_name_init(&noqname->name, NULL);
4796         noqname->nsec = NULL;
4797         noqname->nsecsig = NULL;
4798         result = dns_name_dup(&name, mctx, &noqname->name);
4799         if (result != ISC_R_SUCCESS)
4800                 goto cleanup;
4801         result = dns_rdataslab_fromrdataset(&nsec, mctx, &r, 0);
4802         if (result != ISC_R_SUCCESS)
4803                 goto cleanup;
4804         noqname->nsec = r.base;
4805         result = dns_rdataslab_fromrdataset(&nsecsig, mctx, &r, 0);
4806         if (result != ISC_R_SUCCESS)
4807                 goto cleanup;
4808         noqname->nsecsig = r.base;
4809         dns_rdataset_disassociate(&nsec);
4810         dns_rdataset_disassociate(&nsecsig);
4811         newheader->noqname = noqname;
4812         return (ISC_R_SUCCESS);
4813
4814 cleanup:
4815         dns_rdataset_disassociate(&nsec);
4816         dns_rdataset_disassociate(&nsecsig);
4817         free_noqname(mctx, &noqname);
4818         return(result);
4819 }
4820
4821 static isc_result_t
4822 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4823             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
4824             dns_rdataset_t *addedrdataset)
4825 {
4826         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4827         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4828         rbtdb_version_t *rbtversion = version;
4829         isc_region_t region;
4830         rdatasetheader_t *newheader;
4831         isc_result_t result;
4832         isc_boolean_t delegating;
4833
4834         REQUIRE(VALID_RBTDB(rbtdb));
4835
4836         if (rbtversion == NULL) {
4837                 if (now == 0)
4838                         isc_stdtime_get(&now);
4839         } else
4840                 now = 0;
4841
4842         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
4843                                             &region,
4844                                             sizeof(rdatasetheader_t));
4845         if (result != ISC_R_SUCCESS)
4846                 return (result);
4847
4848         newheader = (rdatasetheader_t *)region.base;
4849         newheader->ttl = rdataset->ttl + now;
4850         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
4851                                                 rdataset->covers);
4852         newheader->attributes = 0;
4853         newheader->noqname = NULL;
4854         newheader->count = init_count++;
4855         newheader->trust = rdataset->trust;
4856         newheader->additional_auth = NULL;
4857         newheader->additional_glue = NULL;
4858         if (rbtversion != NULL) {
4859                 newheader->serial = rbtversion->serial;
4860                 now = 0;
4861         } else {
4862                 newheader->serial = 1;
4863                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
4864                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
4865                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
4866                         result = addnoqname(rbtdb, newheader, rdataset);
4867                         if (result != ISC_R_SUCCESS) {
4868                                 free_rdataset(rbtdb->common.mctx, newheader);
4869                                 return (result);
4870                         }
4871                 }
4872         }
4873
4874         /*
4875          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
4876          * just DNAME for the cache), then we need to set the callback bit
4877          * on the node, and to do that we must be holding an exclusive lock
4878          * on the tree.
4879          */
4880         if (delegating_type(rbtdb, rbtnode, rdataset->type)) {
4881                 delegating = ISC_TRUE;
4882                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
4883         } else
4884                 delegating = ISC_FALSE;
4885
4886         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4887                   isc_rwlocktype_write);
4888
4889         result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
4890                      addedrdataset, now);
4891         if (result == ISC_R_SUCCESS && delegating)
4892                 rbtnode->find_callback = 1;
4893
4894         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4895                     isc_rwlocktype_write);
4896
4897         if (delegating)
4898                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
4899
4900         /*
4901          * Update the zone's secure status.  If version is non-NULL
4902          * this is defered until closeversion() is called.
4903          */
4904         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
4905                 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
4906
4907         return (result);
4908 }
4909
4910 static isc_result_t
4911 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4912                  dns_rdataset_t *rdataset, unsigned int options,
4913                  dns_rdataset_t *newrdataset)
4914 {
4915         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4916         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4917         rbtdb_version_t *rbtversion = version;
4918         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
4919         unsigned char *subresult;
4920         isc_region_t region;
4921         isc_result_t result;
4922         rbtdb_changed_t *changed;
4923
4924         REQUIRE(VALID_RBTDB(rbtdb));
4925
4926         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
4927                                             &region,
4928                                             sizeof(rdatasetheader_t));
4929         if (result != ISC_R_SUCCESS)
4930                 return (result);
4931         newheader = (rdatasetheader_t *)region.base;
4932         newheader->ttl = rdataset->ttl;
4933         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
4934                                                 rdataset->covers);
4935         newheader->attributes = 0;
4936         newheader->serial = rbtversion->serial;
4937         newheader->trust = 0;
4938         newheader->noqname = NULL;
4939         newheader->count = init_count++;
4940         newheader->additional_auth = NULL;
4941         newheader->additional_glue = NULL;
4942
4943         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4944                   isc_rwlocktype_write);
4945
4946         changed = add_changed(rbtdb, rbtversion, rbtnode);
4947         if (changed == NULL) {
4948                 free_rdataset(rbtdb->common.mctx, newheader);
4949                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4950                             isc_rwlocktype_write);
4951                 return (ISC_R_NOMEMORY);
4952         }
4953
4954         topheader_prev = NULL;
4955         for (topheader = rbtnode->data;
4956              topheader != NULL;
4957              topheader = topheader->next) {
4958                 if (topheader->type == newheader->type)
4959                         break;
4960                 topheader_prev = topheader;
4961         }
4962         /*
4963          * If header isn't NULL, we've found the right type.  There may be
4964          * IGNORE rdatasets between the top of the chain and the first real
4965          * data.  We skip over them.
4966          */
4967         header = topheader;
4968         while (header != NULL && IGNORE(header))
4969                 header = header->down;
4970         if (header != NULL && EXISTS(header)) {
4971                 unsigned int flags = 0;
4972                 subresult = NULL;
4973                 result = ISC_R_SUCCESS;
4974                 if ((options & DNS_DBSUB_EXACT) != 0) {
4975                         flags |= DNS_RDATASLAB_EXACT;
4976                         if (newheader->ttl != header->ttl)
4977                                 result = DNS_R_NOTEXACT;
4978                 }
4979                 if (result == ISC_R_SUCCESS)
4980                         result = dns_rdataslab_subtract(
4981                                         (unsigned char *)header,
4982                                         (unsigned char *)newheader,
4983                                         (unsigned int)(sizeof(*newheader)),
4984                                         rbtdb->common.mctx,
4985                                         rbtdb->common.rdclass,
4986                                         (dns_rdatatype_t)header->type,
4987                                         flags, &subresult);
4988                 if (result == ISC_R_SUCCESS) {
4989                         free_rdataset(rbtdb->common.mctx, newheader);
4990                         newheader = (rdatasetheader_t *)subresult;
4991                         /*
4992                          * We have to set the serial since the rdataslab
4993                          * subtraction routine copies the reserved portion of
4994                          * header, not newheader.
4995                          */
4996                         newheader->serial = rbtversion->serial;
4997                         /*
4998                          * XXXJT: dns_rdataslab_subtract() copied the pointers
4999                          * to additional info.  We need to clear these fields
5000                          * to avoid having duplicated references.
5001                          */
5002                         newheader->additional_auth = NULL;
5003                         newheader->additional_glue = NULL;
5004                 } else if (result == DNS_R_NXRRSET) {
5005                         /*
5006                          * This subtraction would remove all of the rdata;
5007                          * add a nonexistent header instead.
5008                          */
5009                         free_rdataset(rbtdb->common.mctx, newheader);
5010                         newheader = isc_mem_get(rbtdb->common.mctx,
5011                                                 sizeof(*newheader));
5012                         if (newheader == NULL) {
5013                                 result = ISC_R_NOMEMORY;
5014                                 goto unlock;
5015                         }
5016                         newheader->ttl = 0;
5017                         newheader->type = topheader->type;
5018                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
5019                         newheader->trust = 0;
5020                         newheader->serial = rbtversion->serial;
5021                         newheader->noqname = NULL;
5022                         newheader->count = 0;
5023                         newheader->additional_auth = NULL;
5024                         newheader->additional_glue = NULL;
5025                 } else {
5026                         free_rdataset(rbtdb->common.mctx, newheader);
5027                         goto unlock;
5028                 }
5029
5030                 /*
5031                  * If we're here, we want to link newheader in front of
5032                  * topheader.
5033                  */
5034                 INSIST(rbtversion->serial >= topheader->serial);
5035                 if (topheader_prev != NULL)
5036                         topheader_prev->next = newheader;
5037                 else
5038                         rbtnode->data = newheader;
5039                 newheader->next = topheader->next;
5040                 newheader->down = topheader;
5041                 topheader->next = newheader;
5042                 rbtnode->dirty = 1;
5043                 changed->dirty = ISC_TRUE;
5044         } else {
5045                 /*
5046                  * The rdataset doesn't exist, so we don't need to do anything
5047                  * to satisfy the deletion request.
5048                  */
5049                 free_rdataset(rbtdb->common.mctx, newheader);
5050                 if ((options & DNS_DBSUB_EXACT) != 0)
5051                         result = DNS_R_NOTEXACT;
5052                 else
5053                         result = DNS_R_UNCHANGED;                       
5054         }
5055
5056         if (result == ISC_R_SUCCESS && newrdataset != NULL)
5057                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
5058
5059  unlock:
5060         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5061                     isc_rwlocktype_write);
5062
5063         /*
5064          * Update the zone's secure status.  If version is non-NULL
5065          * this is defered until closeversion() is called.
5066          */
5067         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
5068                 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
5069
5070         return (result);
5071 }
5072
5073 static isc_result_t
5074 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5075                dns_rdatatype_t type, dns_rdatatype_t covers)
5076 {
5077         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5078         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5079         rbtdb_version_t *rbtversion = version;
5080         isc_result_t result;
5081         rdatasetheader_t *newheader;
5082
5083         REQUIRE(VALID_RBTDB(rbtdb));
5084
5085         if (type == dns_rdatatype_any)
5086                 return (ISC_R_NOTIMPLEMENTED);
5087         if (type == dns_rdatatype_rrsig && covers == 0)
5088                 return (ISC_R_NOTIMPLEMENTED);
5089
5090         newheader = isc_mem_get(rbtdb->common.mctx, sizeof(*newheader));
5091         if (newheader == NULL)
5092                 return (ISC_R_NOMEMORY);
5093         newheader->ttl = 0;
5094         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
5095         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
5096         newheader->trust = 0;
5097         newheader->noqname = NULL;
5098         newheader->additional_auth = NULL;
5099         newheader->additional_glue = NULL;
5100         if (rbtversion != NULL)
5101                 newheader->serial = rbtversion->serial;
5102         else
5103                 newheader->serial = 0;
5104         newheader->count = 0;
5105
5106         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5107                   isc_rwlocktype_write);
5108
5109         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
5110                      ISC_FALSE, NULL, 0);
5111
5112         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5113                     isc_rwlocktype_write);
5114
5115         /*
5116          * Update the zone's secure status.  If version is non-NULL
5117          * this is defered until closeversion() is called.
5118          */
5119         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
5120                 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
5121
5122         return (result);
5123 }
5124
5125 static isc_result_t
5126 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
5127         rbtdb_load_t *loadctx = arg;
5128         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
5129         dns_rbtnode_t *node;
5130         isc_result_t result;
5131         isc_region_t region;
5132         rdatasetheader_t *newheader;
5133
5134         /*
5135          * This routine does no node locking.  See comments in
5136          * 'load' below for more information on loading and
5137          * locking.
5138          */
5139
5140
5141         /*
5142          * SOA records are only allowed at top of zone.
5143          */
5144         if (rdataset->type == dns_rdatatype_soa &&
5145             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
5146                 return (DNS_R_NOTZONETOP);
5147
5148         add_empty_wildcards(rbtdb, name);
5149
5150         if (dns_name_iswildcard(name)) {
5151                 /*
5152                  * NS record owners cannot legally be wild cards.
5153                  */
5154                 if (rdataset->type == dns_rdatatype_ns)
5155                         return (DNS_R_INVALIDNS);
5156                 result = add_wildcard_magic(rbtdb, name);
5157                 if (result != ISC_R_SUCCESS)
5158                         return (result);
5159         }
5160
5161         node = NULL;
5162         result = dns_rbt_addnode(rbtdb->tree, name, &node);
5163         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
5164                 return (result);
5165         if (result != ISC_R_EXISTS) {
5166                 dns_name_t foundname;
5167                 dns_name_init(&foundname, NULL);
5168                 dns_rbt_namefromnode(node, &foundname);
5169 #ifdef DNS_RBT_USEHASH
5170                 node->locknum = node->hashval % rbtdb->node_lock_count;
5171 #else
5172                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
5173                         rbtdb->node_lock_count;
5174 #endif
5175         }
5176
5177         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5178                                             &region,
5179                                             sizeof(rdatasetheader_t));
5180         if (result != ISC_R_SUCCESS)
5181                 return (result);
5182         newheader = (rdatasetheader_t *)region.base;
5183         newheader->ttl = rdataset->ttl + loadctx->now; /* XXX overflow check */
5184         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5185                                                 rdataset->covers);
5186         newheader->attributes = 0;
5187         newheader->trust = rdataset->trust;
5188         newheader->serial = 1;
5189         newheader->noqname = NULL;
5190         newheader->count = init_count++;
5191         newheader->additional_auth = NULL;
5192         newheader->additional_glue = NULL;
5193
5194         result = add(rbtdb, node, rbtdb->current_version, newheader,
5195                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
5196         if (result == ISC_R_SUCCESS &&
5197             delegating_type(rbtdb, node, rdataset->type))
5198                 node->find_callback = 1;
5199         else if (result == DNS_R_UNCHANGED)
5200                 result = ISC_R_SUCCESS;
5201
5202         return (result);
5203 }
5204
5205 static isc_result_t
5206 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
5207         rbtdb_load_t *loadctx;
5208         dns_rbtdb_t *rbtdb;
5209
5210         rbtdb = (dns_rbtdb_t *)db;
5211
5212         REQUIRE(VALID_RBTDB(rbtdb));
5213
5214         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
5215         if (loadctx == NULL)
5216                 return (ISC_R_NOMEMORY);
5217
5218         loadctx->rbtdb = rbtdb;
5219         if (IS_CACHE(rbtdb))
5220                 isc_stdtime_get(&loadctx->now);
5221         else
5222                 loadctx->now = 0;
5223
5224         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5225
5226         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
5227                 == 0);
5228         rbtdb->attributes |= RBTDB_ATTR_LOADING;
5229
5230         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5231
5232         *addp = loading_addrdataset;
5233         *dbloadp = loadctx;
5234
5235         return (ISC_R_SUCCESS);
5236 }
5237
5238 static isc_result_t
5239 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
5240         rbtdb_load_t *loadctx;
5241         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5242
5243         REQUIRE(VALID_RBTDB(rbtdb));
5244         REQUIRE(dbloadp != NULL);
5245         loadctx = *dbloadp;
5246         REQUIRE(loadctx->rbtdb == rbtdb);
5247
5248         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5249
5250         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
5251         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
5252
5253         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
5254         rbtdb->attributes |= RBTDB_ATTR_LOADED;
5255
5256         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5257
5258         /*
5259          * If there's a KEY rdataset at the zone origin containing a
5260          * zone key, we consider the zone secure.
5261          */
5262         if (! IS_CACHE(rbtdb))
5263                 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
5264
5265         *dbloadp = NULL;
5266
5267         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
5268
5269         return (ISC_R_SUCCESS);
5270 }
5271
5272 static isc_result_t
5273 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
5274      dns_masterformat_t masterformat) {
5275         dns_rbtdb_t *rbtdb;
5276
5277         rbtdb = (dns_rbtdb_t *)db;
5278
5279         REQUIRE(VALID_RBTDB(rbtdb));
5280
5281         return (dns_master_dump2(rbtdb->common.mctx, db, version,
5282                                  &dns_master_style_default,
5283                                  filename, masterformat));
5284 }
5285
5286 static void
5287 delete_callback(void *data, void *arg) {
5288         dns_rbtdb_t *rbtdb = arg;
5289         rdatasetheader_t *current, *next;
5290
5291         for (current = data; current != NULL; current = next) {
5292                 next = current->next;
5293                 free_rdataset(rbtdb->common.mctx, current);
5294         }
5295 }
5296
5297 static isc_boolean_t
5298 issecure(dns_db_t *db) {
5299         dns_rbtdb_t *rbtdb;
5300         isc_boolean_t secure;
5301
5302         rbtdb = (dns_rbtdb_t *)db;
5303
5304         REQUIRE(VALID_RBTDB(rbtdb));
5305
5306         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5307         secure = rbtdb->secure;
5308         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5309
5310         return (secure);
5311 }
5312
5313 static unsigned int
5314 nodecount(dns_db_t *db) {
5315         dns_rbtdb_t *rbtdb;
5316         unsigned int count;
5317
5318         rbtdb = (dns_rbtdb_t *)db;
5319
5320         REQUIRE(VALID_RBTDB(rbtdb));
5321
5322         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5323         count = dns_rbt_nodecount(rbtdb->tree);
5324         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5325
5326         return (count);
5327 }
5328
5329 static void
5330 settask(dns_db_t *db, isc_task_t *task) {
5331         dns_rbtdb_t *rbtdb;
5332
5333         rbtdb = (dns_rbtdb_t *)db;
5334
5335         REQUIRE(VALID_RBTDB(rbtdb));
5336
5337         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5338         if (rbtdb->task != NULL)
5339                 isc_task_detach(&rbtdb->task);
5340         if (task != NULL)
5341                 isc_task_attach(task, &rbtdb->task);
5342         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5343 }
5344
5345 static isc_boolean_t
5346 ispersistent(dns_db_t *db) {
5347         UNUSED(db);
5348         return (ISC_FALSE);
5349 }
5350
5351 static isc_result_t
5352 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
5353         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5354         dns_rbtnode_t *onode;
5355         isc_result_t result = ISC_R_SUCCESS;
5356
5357         REQUIRE(VALID_RBTDB(rbtdb));
5358         REQUIRE(nodep != NULL && *nodep == NULL);
5359
5360         /* Note that the access to origin_node doesn't require a DB lock */
5361         onode = (dns_rbtnode_t *)rbtdb->origin_node;
5362         if (onode != NULL) {
5363                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
5364                 new_reference(rbtdb, onode);
5365                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
5366
5367                 *nodep = rbtdb->origin_node;
5368         } else {
5369                 INSIST(!IS_CACHE(rbtdb));
5370                 result = ISC_R_NOTFOUND;
5371         }
5372
5373         return (result);
5374 }
5375
5376 static dns_dbmethods_t zone_methods = {
5377         attach,
5378         detach,
5379         beginload,
5380         endload,
5381         dump,
5382         currentversion,
5383         newversion,
5384         attachversion,
5385         closeversion,
5386         findnode,
5387         zone_find,
5388         zone_findzonecut,
5389         attachnode,
5390         detachnode,
5391         expirenode,
5392         printnode,
5393         createiterator,
5394         zone_findrdataset,
5395         allrdatasets,
5396         addrdataset,
5397         subtractrdataset,
5398         deleterdataset,
5399         issecure,
5400         nodecount,
5401         ispersistent,
5402         overmem,
5403         settask,
5404         getoriginnode
5405 };
5406
5407 static dns_dbmethods_t cache_methods = {
5408         attach,
5409         detach,
5410         beginload,
5411         endload,
5412         dump,
5413         currentversion,
5414         newversion,
5415         attachversion,
5416         closeversion,
5417         findnode,
5418         cache_find,
5419         cache_findzonecut,
5420         attachnode,
5421         detachnode,
5422         expirenode,
5423         printnode,
5424         createiterator,
5425         cache_findrdataset,
5426         allrdatasets,
5427         addrdataset,
5428         subtractrdataset,
5429         deleterdataset,
5430         issecure,
5431         nodecount,
5432         ispersistent,
5433         overmem,
5434         settask,
5435         getoriginnode
5436 };
5437
5438 isc_result_t
5439 #ifdef DNS_RBTDB_VERSION64
5440 dns_rbtdb64_create
5441 #else
5442 dns_rbtdb_create
5443 #endif
5444                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
5445                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
5446                  void *driverarg, dns_db_t **dbp)
5447 {
5448         dns_rbtdb_t *rbtdb;
5449         isc_result_t result;
5450         int i;
5451         dns_name_t name;
5452
5453         /* Keep the compiler happy. */
5454         UNUSED(argc);
5455         UNUSED(argv);
5456         UNUSED(driverarg);
5457
5458         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
5459         if (rbtdb == NULL)
5460                 return (ISC_R_NOMEMORY);
5461
5462         memset(rbtdb, '\0', sizeof(*rbtdb));
5463         dns_name_init(&rbtdb->common.origin, NULL);
5464         rbtdb->common.attributes = 0;
5465         if (type == dns_dbtype_cache) {
5466                 rbtdb->common.methods = &cache_methods;
5467                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
5468         } else if (type == dns_dbtype_stub) {
5469                 rbtdb->common.methods = &zone_methods;
5470                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
5471         } else
5472                 rbtdb->common.methods = &zone_methods;
5473         rbtdb->common.rdclass = rdclass;
5474         rbtdb->common.mctx = NULL;
5475
5476         result = RBTDB_INITLOCK(&rbtdb->lock);
5477         if (result != ISC_R_SUCCESS)
5478                 goto cleanup_rbtdb;
5479
5480         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
5481         if (result != ISC_R_SUCCESS)
5482                 goto cleanup_lock;
5483
5484         if (rbtdb->node_lock_count == 0) {
5485                 if (IS_CACHE(rbtdb))
5486                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
5487                 else
5488                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
5489         }
5490         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
5491         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
5492                                         sizeof(rbtdb_nodelock_t));
5493         if (rbtdb->node_locks == NULL) {
5494                 result = ISC_R_NOMEMORY;
5495                 goto cleanup_tree_lock;
5496         }
5497
5498         rbtdb->active = rbtdb->node_lock_count;
5499
5500         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
5501                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
5502                 if (result == ISC_R_SUCCESS) {
5503                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
5504                         if (result != ISC_R_SUCCESS)
5505                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
5506                 }
5507                 if (result != ISC_R_SUCCESS) {
5508                         while (i-- > 0) {
5509                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
5510                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
5511                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
5512                         }
5513                         goto cleanup_node_locks;
5514                 }
5515                 rbtdb->node_locks[i].exiting = ISC_FALSE;
5516         }
5517         
5518         /*
5519          * Attach to the mctx.  The database will persist so long as there
5520          * are references to it, and attaching to the mctx ensures that our
5521          * mctx won't disappear out from under us.
5522          */
5523         isc_mem_attach(mctx, &rbtdb->common.mctx);
5524
5525         /*
5526          * Must be initalized before free_rbtdb() is called.
5527          */
5528         isc_ondestroy_init(&rbtdb->common.ondest);
5529
5530         /*
5531          * Make a copy of the origin name.
5532          */
5533         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
5534         if (result != ISC_R_SUCCESS) {
5535                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
5536                 return (result);
5537         }
5538
5539         /*
5540          * Make the Red-Black Tree.
5541          */
5542         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
5543         if (result != ISC_R_SUCCESS) {
5544                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
5545                 return (result);
5546         }
5547         /*
5548          * In order to set the node callback bit correctly in zone databases,
5549          * we need to know if the node has the origin name of the zone.
5550          * In loading_addrdataset() we could simply compare the new name
5551          * to the origin name, but this is expensive.  Also, we don't know the
5552          * node name in addrdataset(), so we need another way of knowing the
5553          * zone's top.
5554          *
5555          * We now explicitly create a node for the zone's origin, and then
5556          * we simply remember the node's address.  This is safe, because
5557          * the top-of-zone node can never be deleted, nor can its address
5558          * change.
5559          */
5560         if (!IS_CACHE(rbtdb)) {
5561                 rbtdb->origin_node = NULL;
5562                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
5563                                          &rbtdb->origin_node);
5564                 if (result != ISC_R_SUCCESS) {
5565                         INSIST(result != ISC_R_EXISTS);
5566                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
5567                         return (result);
5568                 }
5569                 /*
5570                  * We need to give the origin node the right locknum.
5571                  */
5572                 dns_name_init(&name, NULL);
5573                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
5574 #ifdef DNS_RBT_USEHASH
5575                 rbtdb->origin_node->locknum =
5576                         rbtdb->origin_node->hashval %
5577                         rbtdb->node_lock_count;
5578 #else
5579                 rbtdb->origin_node->locknum =
5580                         dns_name_hash(&name, ISC_TRUE) %
5581                         rbtdb->node_lock_count;
5582 #endif
5583         }
5584
5585         /*
5586          * Misc. Initialization.
5587          */
5588         result = isc_refcount_init(&rbtdb->references, 1);
5589         if (result != ISC_R_SUCCESS) {
5590                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
5591                 return (result);
5592         }
5593         rbtdb->attributes = 0;
5594         rbtdb->secure = ISC_FALSE;
5595         rbtdb->overmem = ISC_FALSE;
5596         rbtdb->task = NULL;
5597
5598         /*
5599          * Version Initialization.
5600          */
5601         rbtdb->current_serial = 1;
5602         rbtdb->least_serial = 1;
5603         rbtdb->next_serial = 2;
5604         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
5605         if (rbtdb->current_version == NULL) {
5606                 isc_refcount_decrement(&rbtdb->references, NULL);
5607                 isc_refcount_destroy(&rbtdb->references);
5608                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
5609                 return (ISC_R_NOMEMORY);
5610         }
5611         rbtdb->future_version = NULL;
5612         ISC_LIST_INIT(rbtdb->open_versions);
5613         /*
5614          * Keep the current version in the open list so that list operation
5615          * won't happen in normal lookup operations.
5616          */
5617         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
5618
5619         rbtdb->common.magic = DNS_DB_MAGIC;
5620         rbtdb->common.impmagic = RBTDB_MAGIC;
5621
5622         *dbp = (dns_db_t *)rbtdb;
5623
5624         return (ISC_R_SUCCESS);
5625
5626  cleanup_node_locks:
5627         isc_mem_put(mctx, rbtdb->node_locks,
5628                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
5629
5630  cleanup_tree_lock:
5631         isc_rwlock_destroy(&rbtdb->tree_lock);
5632
5633  cleanup_lock:
5634         RBTDB_DESTROYLOCK(&rbtdb->lock);
5635
5636  cleanup_rbtdb:
5637         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
5638         return (result);
5639 }
5640
5641
5642 /*
5643  * Slabbed Rdataset Methods
5644  */
5645
5646 static void
5647 rdataset_disassociate(dns_rdataset_t *rdataset) {
5648         dns_db_t *db = rdataset->private1;
5649         dns_dbnode_t *node = rdataset->private2;
5650
5651         detachnode(db, &node);
5652 }
5653
5654 static isc_result_t
5655 rdataset_first(dns_rdataset_t *rdataset) {
5656         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
5657         unsigned int count;
5658
5659         count = raw[0] * 256 + raw[1];
5660         if (count == 0) {
5661                 rdataset->private5 = NULL;
5662                 return (ISC_R_NOMORE);
5663         }
5664         
5665 #if DNS_RDATASET_FIXED
5666         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
5667                 raw += 2 + (4 * count);
5668         else
5669 #endif
5670                 raw += 2;
5671
5672         /*
5673          * The privateuint4 field is the number of rdata beyond the
5674          * cursor position, so we decrement the total count by one
5675          * before storing it.
5676          *
5677          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
5678          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
5679          * to the first entry in the offset table.
5680          */
5681         count--;
5682         rdataset->privateuint4 = count;
5683         rdataset->private5 = raw;
5684
5685         return (ISC_R_SUCCESS);
5686 }
5687
5688 static isc_result_t
5689 rdataset_next(dns_rdataset_t *rdataset) {
5690         unsigned int count;
5691         unsigned int length;
5692         unsigned char *raw;     /* RDATASLAB */
5693
5694         count = rdataset->privateuint4;
5695         if (count == 0)
5696                 return (ISC_R_NOMORE);
5697         count--;
5698         rdataset->privateuint4 = count;
5699
5700         /*
5701          * Skip forward one record (length + 4) or one offset (4).
5702          */
5703         raw = rdataset->private5;
5704 #if DNS_RDATASET_FIXED
5705         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
5706 #endif
5707                 length = raw[0] * 256 + raw[1];
5708                 raw += length;
5709 #if DNS_RDATASET_FIXED
5710         }
5711         rdataset->private5 = raw + 4;           /* length(2) + order(2) */
5712 #else
5713         rdataset->private5 = raw + 2;           /* length(2) */
5714 #endif
5715
5716         return (ISC_R_SUCCESS);
5717 }
5718
5719 static void
5720 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
5721         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
5722 #if DNS_RDATASET_FIXED
5723         unsigned int offset;
5724 #endif
5725         isc_region_t r;
5726
5727         REQUIRE(raw != NULL);
5728
5729         /*
5730          * Find the start of the record if not already in private5
5731          * then skip the length and order fields.
5732          */
5733 #if DNS_RDATASET_FIXED
5734         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
5735                 offset = (raw[0] << 24) + (raw[1] << 16) +
5736                          (raw[2] << 8) + raw[3];
5737                 raw = rdataset->private3;
5738                 raw += offset;
5739         }
5740 #endif
5741         r.length = raw[0] * 256 + raw[1];
5742
5743 #if DNS_RDATASET_FIXED
5744         raw += 4;
5745 #else
5746         raw += 2;
5747 #endif
5748         r.base = raw;
5749         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
5750 }
5751
5752 static void
5753 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
5754         dns_db_t *db = source->private1;
5755         dns_dbnode_t *node = source->private2;
5756         dns_dbnode_t *cloned_node = NULL;
5757
5758         attachnode(db, node, &cloned_node);
5759         *target = *source;
5760
5761         /*
5762          * Reset iterator state.
5763          */
5764         target->privateuint4 = 0;
5765         target->private5 = NULL;
5766 }
5767
5768 static unsigned int
5769 rdataset_count(dns_rdataset_t *rdataset) {
5770         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
5771         unsigned int count;
5772
5773         count = raw[0] * 256 + raw[1];
5774
5775         return (count);
5776 }
5777
5778 static isc_result_t
5779 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
5780                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
5781 {
5782         dns_db_t *db = rdataset->private1;
5783         dns_dbnode_t *node = rdataset->private2;
5784         dns_dbnode_t *cloned_node;
5785         struct noqname *noqname = rdataset->private6;
5786
5787         cloned_node = NULL;
5788         attachnode(db, node, &cloned_node);
5789         nsec->methods = &rdataset_methods;
5790         nsec->rdclass = db->rdclass;
5791         nsec->type = dns_rdatatype_nsec;
5792         nsec->covers = 0;
5793         nsec->ttl = rdataset->ttl;
5794         nsec->trust = rdataset->trust;
5795         nsec->private1 = rdataset->private1;
5796         nsec->private2 = rdataset->private2;
5797         nsec->private3 = noqname->nsec;
5798         nsec->privateuint4 = 0;
5799         nsec->private5 = NULL;
5800         nsec->private6 = NULL;
5801
5802         cloned_node = NULL;
5803         attachnode(db, node, &cloned_node);
5804         nsecsig->methods = &rdataset_methods;
5805         nsecsig->rdclass = db->rdclass;
5806         nsecsig->type = dns_rdatatype_rrsig;
5807         nsecsig->covers = dns_rdatatype_nsec;
5808         nsecsig->ttl = rdataset->ttl;
5809         nsecsig->trust = rdataset->trust;
5810         nsecsig->private1 = rdataset->private1;
5811         nsecsig->private2 = rdataset->private2;
5812         nsecsig->private3 = noqname->nsecsig;
5813         nsecsig->privateuint4 = 0;
5814         nsecsig->private5 = NULL;
5815         nsec->private6 = NULL;
5816
5817         dns_name_clone(&noqname->name, name);
5818
5819         return (ISC_R_SUCCESS);
5820 }
5821
5822 /*
5823  * Rdataset Iterator Methods
5824  */
5825
5826 static void
5827 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
5828         rbtdb_rdatasetiter_t *rbtiterator;
5829
5830         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
5831
5832         if (rbtiterator->common.version != NULL)
5833                 closeversion(rbtiterator->common.db,
5834                              &rbtiterator->common.version, ISC_FALSE);
5835         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
5836         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
5837                     sizeof(*rbtiterator));
5838
5839         *iteratorp = NULL;
5840 }
5841
5842 static isc_result_t
5843 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
5844         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
5845         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
5846         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
5847         rbtdb_version_t *rbtversion = rbtiterator->common.version;
5848         rdatasetheader_t *header, *top_next;
5849         rbtdb_serial_t serial;
5850         isc_stdtime_t now;
5851
5852         if (IS_CACHE(rbtdb)) {
5853                 serial = 1;
5854                 now = rbtiterator->common.now;
5855         } else {
5856                 serial = rbtversion->serial;
5857                 now = 0;
5858         }
5859
5860         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5861                   isc_rwlocktype_read);
5862
5863         for (header = rbtnode->data; header != NULL; header = top_next) {
5864                 top_next = header->next;
5865                 do {
5866                         if (header->serial <= serial && !IGNORE(header)) {
5867                                 /*
5868                                  * Is this a "this rdataset doesn't exist"
5869                                  * record?  Or is it too old in the cache?
5870                                  *
5871                                  * Note: unlike everywhere else, we
5872                                  * check for now > header->ttl instead
5873                                  * of now >= header->ttl.  This allows
5874                                  * ANY and RRSIG queries for 0 TTL
5875                                  * rdatasets to work.
5876                                  */
5877                                 if (NONEXISTENT(header) ||
5878                                     (now != 0 && now > header->ttl))
5879                                         header = NULL;
5880                                 break;
5881                         } else
5882                                 header = header->down;
5883                 } while (header != NULL);
5884                 if (header != NULL)
5885                         break;
5886         }
5887
5888         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5889                     isc_rwlocktype_read);
5890
5891         rbtiterator->current = header;
5892
5893         if (header == NULL)
5894                 return (ISC_R_NOMORE);
5895
5896         return (ISC_R_SUCCESS);
5897 }
5898
5899 static isc_result_t
5900 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
5901         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
5902         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
5903         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
5904         rbtdb_version_t *rbtversion = rbtiterator->common.version;
5905         rdatasetheader_t *header, *top_next;
5906         rbtdb_serial_t serial;
5907         isc_stdtime_t now;
5908         rbtdb_rdatatype_t type, negtype;
5909         dns_rdatatype_t rdtype, covers;
5910
5911         header = rbtiterator->current;
5912         if (header == NULL)
5913                 return (ISC_R_NOMORE);
5914
5915         if (IS_CACHE(rbtdb)) {
5916                 serial = 1;
5917                 now = rbtiterator->common.now;
5918         } else {
5919                 serial = rbtversion->serial;
5920                 now = 0;
5921         }
5922
5923         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5924                   isc_rwlocktype_read);
5925
5926         type = header->type;
5927         rdtype = RBTDB_RDATATYPE_BASE(header->type);
5928         if (rdtype == 0) {
5929                 covers = RBTDB_RDATATYPE_EXT(header->type);
5930                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5931         } else 
5932                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5933         for (header = header->next; header != NULL; header = top_next) {
5934                 top_next = header->next;
5935                 /*
5936                  * If not walking back up the down list.
5937                  */
5938                 if (header->type != type && header->type != negtype) {
5939                         do {
5940                                 if (header->serial <= serial &&
5941                                     !IGNORE(header)) {
5942                                         /*
5943                                          * Is this a "this rdataset doesn't
5944                                          * exist" record?
5945                                          *
5946                                          * Note: unlike everywhere else, we
5947                                          * check for now > header->ttl instead
5948                                          * of now >= header->ttl.  This allows
5949                                          * ANY and RRSIG queries for 0 TTL
5950                                          * rdatasets to work.
5951                                          */
5952                                         if ((header->attributes &
5953                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
5954                                             (now != 0 && now > header->ttl))
5955                                                 header = NULL;
5956                                         break;
5957                                 } else
5958                                         header = header->down;
5959                         } while (header != NULL);
5960                         if (header != NULL)
5961                                 break;
5962                 }
5963         }
5964
5965         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5966                     isc_rwlocktype_read);
5967
5968         rbtiterator->current = header;
5969
5970         if (header == NULL)
5971                 return (ISC_R_NOMORE);
5972
5973         return (ISC_R_SUCCESS);
5974 }
5975
5976 static void
5977 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
5978         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
5979         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
5980         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
5981         rdatasetheader_t *header;
5982
5983         header = rbtiterator->current;
5984         REQUIRE(header != NULL);
5985
5986         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5987                   isc_rwlocktype_read);
5988
5989         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
5990                       rdataset);
5991
5992         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5993                     isc_rwlocktype_read);
5994 }
5995
5996
5997 /*
5998  * Database Iterator Methods
5999  */
6000
6001 static inline void
6002 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
6003         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6004         dns_rbtnode_t *node = rbtdbiter->node;
6005
6006         if (node == NULL)
6007                 return;
6008
6009         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
6010         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
6011         new_reference(rbtdb, node);
6012         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
6013 }
6014
6015 static inline void
6016 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
6017         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6018         dns_rbtnode_t *node = rbtdbiter->node;
6019         nodelock_t *lock;
6020
6021         if (node == NULL)
6022                 return;
6023
6024         lock = &rbtdb->node_locks[node->locknum].lock;
6025         NODE_LOCK(lock, isc_rwlocktype_read);
6026         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
6027                             rbtdbiter->tree_locked);
6028         NODE_UNLOCK(lock, isc_rwlocktype_read);
6029
6030         rbtdbiter->node = NULL;
6031 }
6032
6033 static void
6034 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
6035         dns_rbtnode_t *node;
6036         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6037         isc_boolean_t was_read_locked = ISC_FALSE;
6038         nodelock_t *lock;
6039         int i;
6040
6041         if (rbtdbiter->delete != 0) {
6042                 /*
6043                  * Note that "%d node of %d in tree" can report things like
6044                  * "flush_deletions: 59 nodes of 41 in tree".  This means
6045                  * That some nodes appear on the deletions list more than
6046                  * once.  Only the last occurence will actually be deleted.
6047                  */
6048                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
6049                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
6050                               "flush_deletions: %d nodes of %d in tree",
6051                               rbtdbiter->delete,
6052                               dns_rbt_nodecount(rbtdb->tree));
6053
6054                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
6055                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6056                         was_read_locked = ISC_TRUE;
6057                 }
6058                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6059                 rbtdbiter->tree_locked = isc_rwlocktype_write;
6060
6061                 for (i = 0; i < rbtdbiter->delete; i++) {
6062                         node = rbtdbiter->deletions[i];
6063                         lock = &rbtdb->node_locks[node->locknum].lock;
6064
6065                         NODE_LOCK(lock, isc_rwlocktype_read);
6066                         decrement_reference(rbtdb, node, 0,
6067                                             isc_rwlocktype_read,
6068                                             rbtdbiter->tree_locked);
6069                         NODE_UNLOCK(lock, isc_rwlocktype_read);
6070                 }
6071
6072                 rbtdbiter->delete = 0;
6073
6074                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6075                 if (was_read_locked) {
6076                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6077                         rbtdbiter->tree_locked = isc_rwlocktype_read;
6078
6079                 } else {
6080                         rbtdbiter->tree_locked = isc_rwlocktype_none;
6081                 }
6082         }
6083 }
6084
6085 static inline void
6086 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
6087         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6088
6089         REQUIRE(rbtdbiter->paused);
6090         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
6091
6092         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6093         rbtdbiter->tree_locked = isc_rwlocktype_read;
6094
6095         rbtdbiter->paused = ISC_FALSE;
6096 }
6097
6098 static void
6099 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
6100         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
6101         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6102         dns_db_t *db = NULL;
6103
6104         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
6105                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6106                 rbtdbiter->tree_locked = isc_rwlocktype_none;
6107         } else
6108                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
6109
6110         dereference_iter_node(rbtdbiter);
6111
6112         flush_deletions(rbtdbiter);
6113
6114         dns_db_attach(rbtdbiter->common.db, &db);
6115         dns_db_detach(&rbtdbiter->common.db);
6116
6117         dns_rbtnodechain_reset(&rbtdbiter->chain);
6118         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
6119         dns_db_detach(&db);
6120
6121         *iteratorp = NULL;
6122 }
6123
6124 static isc_result_t
6125 dbiterator_first(dns_dbiterator_t *iterator) {
6126         isc_result_t result;
6127         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6128         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6129         dns_name_t *name, *origin;
6130
6131         if (rbtdbiter->result != ISC_R_SUCCESS &&
6132             rbtdbiter->result != ISC_R_NOMORE)
6133                 return (rbtdbiter->result);
6134
6135         if (rbtdbiter->paused)
6136                 resume_iteration(rbtdbiter);
6137
6138         dereference_iter_node(rbtdbiter);
6139
6140         name = dns_fixedname_name(&rbtdbiter->name);
6141         origin = dns_fixedname_name(&rbtdbiter->origin);
6142         dns_rbtnodechain_reset(&rbtdbiter->chain);
6143
6144         result = dns_rbtnodechain_first(&rbtdbiter->chain, rbtdb->tree, name,
6145                                         origin);
6146
6147         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
6148                 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
6149                                                   NULL, &rbtdbiter->node);
6150                 if (result == ISC_R_SUCCESS) {
6151                         rbtdbiter->new_origin = ISC_TRUE;
6152                         reference_iter_node(rbtdbiter);
6153                 }
6154         } else {
6155                 INSIST(result == ISC_R_NOTFOUND);
6156                 result = ISC_R_NOMORE; /* The tree is empty. */
6157         }
6158
6159         rbtdbiter->result = result;
6160
6161         return (result);
6162 }
6163
6164 static isc_result_t
6165 dbiterator_last(dns_dbiterator_t *iterator) {
6166         isc_result_t result;
6167         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6168         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6169         dns_name_t *name, *origin;
6170
6171         if (rbtdbiter->result != ISC_R_SUCCESS &&
6172             rbtdbiter->result != ISC_R_NOMORE)
6173                 return (rbtdbiter->result);
6174
6175         if (rbtdbiter->paused)
6176                 resume_iteration(rbtdbiter);
6177
6178         dereference_iter_node(rbtdbiter);
6179
6180         name = dns_fixedname_name(&rbtdbiter->name);
6181         origin = dns_fixedname_name(&rbtdbiter->origin);
6182         dns_rbtnodechain_reset(&rbtdbiter->chain);
6183
6184         result = dns_rbtnodechain_last(&rbtdbiter->chain, rbtdb->tree, name,
6185                                        origin);
6186         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
6187                 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
6188                                                   NULL, &rbtdbiter->node);
6189                 if (result == ISC_R_SUCCESS) {
6190                         rbtdbiter->new_origin = ISC_TRUE;
6191                         reference_iter_node(rbtdbiter);
6192                 }
6193         } else {
6194                 INSIST(result == ISC_R_NOTFOUND);
6195                 result = ISC_R_NOMORE; /* The tree is empty. */
6196         }
6197
6198         rbtdbiter->result = result;
6199
6200         return (result);
6201 }
6202
6203 static isc_result_t
6204 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
6205         isc_result_t result;
6206         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6207         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6208         dns_name_t *iname, *origin;
6209
6210         if (rbtdbiter->result != ISC_R_SUCCESS &&
6211             rbtdbiter->result != ISC_R_NOMORE)
6212                 return (rbtdbiter->result);
6213
6214         if (rbtdbiter->paused)
6215                 resume_iteration(rbtdbiter);
6216
6217         dereference_iter_node(rbtdbiter);
6218
6219         iname = dns_fixedname_name(&rbtdbiter->name);
6220         origin = dns_fixedname_name(&rbtdbiter->origin);
6221         dns_rbtnodechain_reset(&rbtdbiter->chain);
6222
6223         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &rbtdbiter->node,
6224                                   &rbtdbiter->chain, DNS_RBTFIND_EMPTYDATA,
6225                                   NULL, NULL);
6226         if (result == ISC_R_SUCCESS) {
6227                 result = dns_rbtnodechain_current(&rbtdbiter->chain, iname,
6228                                                   origin, NULL);
6229                 if (result == ISC_R_SUCCESS) {
6230                         rbtdbiter->new_origin = ISC_TRUE;
6231                         reference_iter_node(rbtdbiter);
6232                 }
6233
6234         } else if (result == DNS_R_PARTIALMATCH)
6235                 result = ISC_R_NOTFOUND;
6236
6237         rbtdbiter->result = result;
6238
6239         return (result);
6240 }
6241
6242 static isc_result_t
6243 dbiterator_prev(dns_dbiterator_t *iterator) {
6244         isc_result_t result;
6245         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6246         dns_name_t *name, *origin;
6247
6248         REQUIRE(rbtdbiter->node != NULL);
6249
6250         if (rbtdbiter->result != ISC_R_SUCCESS)
6251                 return (rbtdbiter->result);
6252
6253         if (rbtdbiter->paused)
6254                 resume_iteration(rbtdbiter);
6255
6256         name = dns_fixedname_name(&rbtdbiter->name);
6257         origin = dns_fixedname_name(&rbtdbiter->origin);
6258         result = dns_rbtnodechain_prev(&rbtdbiter->chain, name, origin);
6259
6260         dereference_iter_node(rbtdbiter);
6261
6262         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
6263                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
6264                 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
6265                                                   NULL, &rbtdbiter->node);
6266         }
6267
6268         if (result == ISC_R_SUCCESS)
6269                 reference_iter_node(rbtdbiter);
6270
6271         rbtdbiter->result = result;
6272
6273         return (result);
6274 }
6275
6276 static isc_result_t
6277 dbiterator_next(dns_dbiterator_t *iterator) {
6278         isc_result_t result;
6279         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6280         dns_name_t *name, *origin;
6281
6282         REQUIRE(rbtdbiter->node != NULL);
6283
6284         if (rbtdbiter->result != ISC_R_SUCCESS)
6285                 return (rbtdbiter->result);
6286
6287         if (rbtdbiter->paused)
6288                 resume_iteration(rbtdbiter);
6289
6290         name = dns_fixedname_name(&rbtdbiter->name);
6291         origin = dns_fixedname_name(&rbtdbiter->origin);
6292         result = dns_rbtnodechain_next(&rbtdbiter->chain, name, origin);
6293
6294         dereference_iter_node(rbtdbiter);
6295
6296         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
6297                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
6298                 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
6299                                                   NULL, &rbtdbiter->node);
6300         }
6301         if (result == ISC_R_SUCCESS)
6302                 reference_iter_node(rbtdbiter);
6303
6304         rbtdbiter->result = result;
6305
6306         return (result);
6307 }
6308
6309 static isc_result_t
6310 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
6311                    dns_name_t *name)
6312 {
6313         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6314         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6315         dns_rbtnode_t *node = rbtdbiter->node;
6316         isc_result_t result;
6317         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
6318         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
6319
6320         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
6321         REQUIRE(rbtdbiter->node != NULL);
6322
6323         if (rbtdbiter->paused)
6324                 resume_iteration(rbtdbiter);
6325
6326         if (name != NULL) {
6327                 if (rbtdbiter->common.relative_names)
6328                         origin = NULL;
6329                 result = dns_name_concatenate(nodename, origin, name, NULL);
6330                 if (result != ISC_R_SUCCESS)
6331                         return (result);
6332                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
6333                         result = DNS_R_NEWORIGIN;
6334         } else
6335                 result = ISC_R_SUCCESS;
6336
6337         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
6338         new_reference(rbtdb, node);
6339         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
6340
6341         *nodep = rbtdbiter->node;
6342
6343         if (iterator->cleaning && result == ISC_R_SUCCESS) {
6344                 isc_result_t expire_result;
6345
6346                 /*
6347                  * If the deletion array is full, flush it before trying
6348                  * to expire the current node.  The current node can't
6349                  * fully deleted while the iteration cursor is still on it.
6350                  */
6351                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
6352                         flush_deletions(rbtdbiter);
6353
6354                 expire_result = expirenode(iterator->db, *nodep, 0);
6355
6356                 /*
6357                  * expirenode() currently always returns success.
6358                  */
6359                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
6360                         unsigned int refs;
6361
6362                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
6363                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
6364                         dns_rbtnode_refincrement(node, &refs);
6365                         INSIST(refs != 0);
6366                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
6367                 }
6368         }
6369
6370         return (result);
6371 }
6372
6373 static isc_result_t
6374 dbiterator_pause(dns_dbiterator_t *iterator) {
6375         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6376         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6377
6378         if (rbtdbiter->result != ISC_R_SUCCESS &&
6379             rbtdbiter->result != ISC_R_NOMORE)
6380                 return (rbtdbiter->result);
6381
6382         if (rbtdbiter->paused)
6383                 return (ISC_R_SUCCESS);
6384
6385         rbtdbiter->paused = ISC_TRUE;
6386
6387         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
6388                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
6389                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6390                 rbtdbiter->tree_locked = isc_rwlocktype_none;
6391         }
6392
6393         flush_deletions(rbtdbiter);
6394
6395         return (ISC_R_SUCCESS);
6396 }
6397
6398 static isc_result_t
6399 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
6400         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6401         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
6402
6403         if (rbtdbiter->result != ISC_R_SUCCESS)
6404                 return (rbtdbiter->result);
6405
6406         return (dns_name_copy(origin, name, NULL));
6407 }
6408
6409 /*%
6410  * Additional cache routines.
6411  */
6412 static isc_result_t
6413 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
6414                        dns_rdatatype_t qtype, dns_acache_t *acache,
6415                        dns_zone_t **zonep, dns_db_t **dbp,
6416                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
6417                        dns_name_t *fname, dns_message_t *msg,
6418                        isc_stdtime_t now)
6419 {
6420         dns_rbtdb_t *rbtdb = rdataset->private1;
6421         dns_rbtnode_t *rbtnode = rdataset->private2;
6422         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
6423         unsigned int current_count = rdataset->privateuint4;
6424         unsigned int count;
6425         rdatasetheader_t *header;
6426         nodelock_t *nodelock;
6427         unsigned int total_count;
6428         acachectl_t *acarray;
6429         dns_acacheentry_t *entry;
6430         isc_result_t result;
6431
6432         UNUSED(qtype); /* we do not use this value at least for now */
6433         UNUSED(acache);
6434
6435         header = (struct rdatasetheader *)(raw - sizeof(*header));
6436
6437         total_count = raw[0] * 256 + raw[1];
6438         INSIST(total_count > current_count);
6439         count = total_count - current_count - 1;
6440
6441         acarray = NULL;
6442
6443         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
6444         NODE_LOCK(nodelock, isc_rwlocktype_read);
6445
6446         switch (type) {
6447         case dns_rdatasetadditional_fromauth:
6448                 acarray = header->additional_auth;
6449                 break;
6450         case dns_rdatasetadditional_fromcache:
6451                 acarray = NULL;
6452                 break;
6453         case dns_rdatasetadditional_fromglue:
6454                 acarray = header->additional_glue;
6455                 break;
6456         default:
6457                 INSIST(0);
6458         }
6459
6460         if (acarray == NULL) {
6461                 if (type != dns_rdatasetadditional_fromcache)
6462                         dns_acache_countquerymiss(acache);
6463                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
6464                 return (ISC_R_NOTFOUND);
6465         }
6466
6467         if (acarray[count].entry == NULL) {
6468                 dns_acache_countquerymiss(acache);
6469                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
6470                 return (ISC_R_NOTFOUND);
6471         }
6472
6473         entry = NULL;
6474         dns_acache_attachentry(acarray[count].entry, &entry);
6475
6476         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
6477
6478         result = dns_acache_getentry(entry, zonep, dbp, versionp,
6479                                      nodep, fname, msg, now);
6480
6481         dns_acache_detachentry(&entry);
6482
6483         return (result);
6484 }
6485
6486 static void
6487 acache_callback(dns_acacheentry_t *entry, void **arg) {
6488         dns_rbtdb_t *rbtdb;
6489         dns_rbtnode_t *rbtnode;
6490         nodelock_t *nodelock;
6491         acachectl_t *acarray = NULL;
6492         acache_cbarg_t *cbarg;
6493         unsigned int count;
6494
6495         REQUIRE(arg != NULL);
6496         cbarg = *arg;
6497
6498         /*
6499          * The caller must hold the entry lock.
6500          */
6501
6502         rbtdb = (dns_rbtdb_t *)cbarg->db;
6503         rbtnode = (dns_rbtnode_t *)cbarg->node;
6504
6505         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
6506         NODE_LOCK(nodelock, isc_rwlocktype_write);
6507
6508         switch (cbarg->type) {
6509         case dns_rdatasetadditional_fromauth:
6510                 acarray = cbarg->header->additional_auth;
6511                 break;
6512         case dns_rdatasetadditional_fromglue:
6513                 acarray = cbarg->header->additional_glue;
6514                 break;
6515         default:
6516                 INSIST(0);
6517         }
6518
6519         count = cbarg->count;
6520         if (acarray[count].entry == entry)
6521                 acarray[count].entry = NULL;
6522         INSIST(acarray[count].cbarg != NULL);
6523         isc_mem_put(rbtdb->common.mctx, acarray[count].cbarg,
6524                     sizeof(acache_cbarg_t));
6525         acarray[count].cbarg = NULL;
6526
6527         dns_acache_detachentry(&entry);
6528
6529         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
6530
6531         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
6532         dns_db_detach((dns_db_t **)(void*)&rbtdb);
6533
6534         *arg = NULL;
6535 }
6536
6537 static void
6538 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
6539                       acache_cbarg_t **cbargp)
6540 {
6541         acache_cbarg_t *cbarg;
6542
6543         REQUIRE(mctx != NULL);
6544         REQUIRE(entry != NULL);
6545         REQUIRE(cbargp != NULL && *cbargp != NULL);
6546
6547         cbarg = *cbargp;
6548
6549         dns_acache_cancelentry(entry);
6550         dns_db_detachnode(cbarg->db, &cbarg->node);
6551         dns_db_detach(&cbarg->db);
6552
6553         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
6554
6555         *cbargp = NULL;
6556 }
6557
6558 static isc_result_t
6559 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
6560                        dns_rdatatype_t qtype, dns_acache_t *acache,
6561                        dns_zone_t *zone, dns_db_t *db,
6562                        dns_dbversion_t *version, dns_dbnode_t *node,
6563                        dns_name_t *fname)
6564 {
6565         dns_rbtdb_t *rbtdb = rdataset->private1;
6566         dns_rbtnode_t *rbtnode = rdataset->private2;
6567         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
6568         unsigned int current_count = rdataset->privateuint4;
6569         rdatasetheader_t *header;
6570         unsigned int total_count, count;
6571         nodelock_t *nodelock;
6572         isc_result_t result;
6573         acachectl_t *acarray;
6574         dns_acacheentry_t *newentry, *oldentry = NULL;
6575         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
6576
6577         UNUSED(qtype);
6578
6579         if (type == dns_rdatasetadditional_fromcache)
6580                 return (ISC_R_SUCCESS);
6581
6582         header = (struct rdatasetheader *)(raw - sizeof(*header));
6583
6584         total_count = raw[0] * 256 + raw[1];
6585         INSIST(total_count > current_count);
6586         count = total_count - current_count - 1; /* should be private data */
6587
6588         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
6589         if (newcbarg == NULL)
6590                 return (ISC_R_NOMEMORY);
6591         newcbarg->type = type;
6592         newcbarg->count = count;
6593         newcbarg->header = header;
6594         newcbarg->db = NULL;
6595         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
6596         newcbarg->node = NULL;
6597         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
6598                           &newcbarg->node);
6599         newentry = NULL;
6600         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
6601                                         acache_callback, newcbarg, &newentry);
6602         if (result != ISC_R_SUCCESS)
6603                 goto fail;
6604         /* Set cache data in the new entry. */
6605         result = dns_acache_setentry(acache, newentry, zone, db,
6606                                      version, node, fname);
6607         if (result != ISC_R_SUCCESS)
6608                 goto fail;
6609
6610         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
6611         NODE_LOCK(nodelock, isc_rwlocktype_write);
6612
6613         acarray = NULL;
6614         switch (type) {
6615         case dns_rdatasetadditional_fromauth:
6616                 acarray = header->additional_auth;
6617                 break;
6618         case dns_rdatasetadditional_fromglue:
6619                 acarray = header->additional_glue;
6620                 break;
6621         default:
6622                 INSIST(0);
6623         }
6624
6625         if (acarray == NULL) {
6626                 unsigned int i;
6627
6628                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
6629                                       sizeof(acachectl_t));
6630
6631                 if (acarray == NULL) {
6632                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
6633                         goto fail;
6634                 }
6635
6636                 for (i = 0; i < total_count; i++) {
6637                         acarray[i].entry = NULL;
6638                         acarray[i].cbarg = NULL;
6639                 }
6640         }
6641         switch (type) {
6642         case dns_rdatasetadditional_fromauth:
6643                 header->additional_auth = acarray;
6644                 break;
6645         case dns_rdatasetadditional_fromglue:
6646                 header->additional_glue = acarray;
6647                 break;
6648         default:
6649                 INSIST(0);
6650         }
6651
6652         if (acarray[count].entry != NULL) {
6653                 /*
6654                  * Swap the entry.  Delay cleaning-up the old entry since
6655                  * it would require a node lock.
6656                  */
6657                 oldentry = acarray[count].entry;
6658                 INSIST(acarray[count].cbarg != NULL);
6659                 oldcbarg = acarray[count].cbarg;
6660         }
6661         acarray[count].entry = newentry;
6662         acarray[count].cbarg = newcbarg;
6663
6664         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
6665
6666         if (oldentry != NULL) {
6667                 if (oldcbarg != NULL)
6668                         acache_cancelentry(rbtdb->common.mctx, oldentry,
6669                                            &oldcbarg); 
6670                 dns_acache_detachentry(&oldentry);
6671         }
6672
6673         return (ISC_R_SUCCESS);
6674
6675   fail:
6676         if (newcbarg != NULL) {
6677                 if (newentry != NULL) {
6678                         acache_cancelentry(rbtdb->common.mctx, newentry,
6679                                            &newcbarg);
6680                         dns_acache_detachentry(&newentry);
6681                 } else {
6682                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
6683                         dns_db_detach(&newcbarg->db);
6684                         isc_mem_put(rbtdb->common.mctx, newcbarg,
6685                             sizeof(*newcbarg));
6686                 }
6687         }
6688
6689         return (result);
6690 }
6691
6692 static isc_result_t
6693 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
6694                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
6695
6696         dns_rbtdb_t *rbtdb = rdataset->private1;
6697         dns_rbtnode_t *rbtnode = rdataset->private2;
6698         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
6699         unsigned int current_count = rdataset->privateuint4;
6700         rdatasetheader_t *header;
6701         nodelock_t *nodelock;
6702         unsigned int total_count, count;
6703         acachectl_t *acarray;
6704         dns_acacheentry_t *entry;
6705         acache_cbarg_t *cbarg;
6706
6707         UNUSED(qtype);          /* we do not use this value at least for now */
6708         UNUSED(acache);
6709
6710         if (type == dns_rdatasetadditional_fromcache)
6711                 return (ISC_R_SUCCESS);
6712
6713         header = (struct rdatasetheader *)(raw - sizeof(*header));
6714
6715         total_count = raw[0] * 256 + raw[1];
6716         INSIST(total_count > current_count);
6717         count = total_count - current_count - 1;
6718
6719         acarray = NULL;
6720         entry = NULL;
6721
6722         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
6723         NODE_LOCK(nodelock, isc_rwlocktype_write);
6724
6725         switch (type) {
6726         case dns_rdatasetadditional_fromauth:
6727                 acarray = header->additional_auth;
6728                 break;
6729         case dns_rdatasetadditional_fromglue:
6730                 acarray = header->additional_glue;
6731                 break;
6732         default:
6733                 INSIST(0);
6734         }
6735
6736         if (acarray == NULL) {
6737                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
6738                 return (ISC_R_NOTFOUND);
6739         }
6740
6741         entry = acarray[count].entry;
6742         if (entry == NULL) {
6743                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
6744                 return (ISC_R_NOTFOUND);
6745         }
6746
6747         acarray[count].entry = NULL;
6748         cbarg = acarray[count].cbarg;
6749         acarray[count].cbarg = NULL;
6750
6751         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
6752
6753         if (entry != NULL) {
6754                 if (cbarg != NULL)
6755                         acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
6756                 dns_acache_detachentry(&entry);
6757         }
6758
6759         return (ISC_R_SUCCESS);
6760 }