]> CyberLeo.Net >> Repos - FreeBSD/releng/7.2.git/blob - contrib/bind9/lib/dns/rbtdb.c
Create releng/7.2 from stable/7 in preparation for 7.2-RELEASE.
[FreeBSD/releng/7.2.git] / contrib / bind9 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004-2008  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: rbtdb.c,v 1.196.18.53 2008/01/31 23:46:05 tbox Exp $ */
19
20 /*! \file */
21
22 /*
23  * Principal Author: Bob Halley
24  */
25
26 #include <config.h>
27
28 #include <isc/event.h>
29 #include <isc/mem.h>
30 #include <isc/print.h>
31 #include <isc/mutex.h>
32 #include <isc/random.h>
33 #include <isc/refcount.h>
34 #include <isc/rwlock.h>
35 #include <isc/string.h>
36 #include <isc/task.h>
37 #include <isc/time.h>
38 #include <isc/util.h>
39
40 #include <dns/acache.h>
41 #include <dns/db.h>
42 #include <dns/dbiterator.h>
43 #include <dns/events.h>
44 #include <dns/fixedname.h>
45 #include <dns/lib.h>
46 #include <dns/log.h>
47 #include <dns/masterdump.h>
48 #include <dns/rbt.h>
49 #include <dns/rdata.h>
50 #include <dns/rdataset.h>
51 #include <dns/rdatasetiter.h>
52 #include <dns/rdataslab.h>
53 #include <dns/result.h>
54 #include <dns/view.h>
55 #include <dns/zone.h>
56 #include <dns/zonekey.h>
57
58 #ifdef DNS_RBTDB_VERSION64
59 #include "rbtdb64.h"
60 #else
61 #include "rbtdb.h"
62 #endif
63
64 #ifdef DNS_RBTDB_VERSION64
65 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
66 #else
67 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
68 #endif
69
70 /*%
71  * Note that "impmagic" is not the first four bytes of the struct, so
72  * ISC_MAGIC_VALID cannot be used.
73  */
74 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
75                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
76
77 #ifdef DNS_RBTDB_VERSION64
78 typedef isc_uint64_t                    rbtdb_serial_t;
79 /*%
80  * Make casting easier in symbolic debuggers by using different names
81  * for the 64 bit version.
82  */
83 #define dns_rbtdb_t dns_rbtdb64_t
84 #define rdatasetheader_t rdatasetheader64_t
85 #define rbtdb_version_t rbtdb_version64_t
86 #else
87 typedef isc_uint32_t                    rbtdb_serial_t;
88 #endif
89
90 typedef isc_uint32_t                    rbtdb_rdatatype_t;
91
92 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
93 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
94 #define RBTDB_RDATATYPE_VALUE(b, e)     (((e) << 16) | (b))
95
96 #define RBTDB_RDATATYPE_SIGNSEC \
97                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
98 #define RBTDB_RDATATYPE_SIGNS \
99                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
100 #define RBTDB_RDATATYPE_SIGCNAME \
101                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
102 #define RBTDB_RDATATYPE_SIGDNAME \
103                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
104 #define RBTDB_RDATATYPE_NCACHEANY \
105                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
106
107 /*
108  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
109  * Using rwlock is effective with regard to lookup performance only when
110  * it is implemented in an efficient way.
111  * Otherwise, it is generally wise to stick to the simple locking since rwlock
112  * would require more memory or can even make lookups slower due to its own
113  * overhead (when it internally calls mutex locks).
114  */
115 #ifdef ISC_RWLOCK_USEATOMIC
116 #define DNS_RBTDB_USERWLOCK 1
117 #else
118 #define DNS_RBTDB_USERWLOCK 0
119 #endif
120
121 #if DNS_RBTDB_USERWLOCK
122 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
123 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
124 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
125 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
126 #else
127 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
128 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
129 #define RBTDB_LOCK(l, t)        LOCK(l)
130 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
131 #endif
132
133 /*
134  * Since node locking is sensitive to both performance and memory footprint,
135  * we need some trick here.  If we have both high-performance rwlock and
136  * high performance and small-memory reference counters, we use rwlock for
137  * node lock and isc_refcount for node references.  In this case, we don't have
138  * to protect the access to the counters by locks.
139  * Otherwise, we simply use ordinary mutex lock for node locking, and use
140  * simple integers as reference counters which is protected by the lock.
141  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
142  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
143  * counters first and then protect other parts of a node as read-only data.
144  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
145  * provided for these special cases.  When we can use the efficient backend
146  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
147  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
148  * section including the access to the reference counter.
149  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
150  * section is also protected by NODE_STRONGLOCK().
151  */
152 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
153 typedef isc_rwlock_t nodelock_t;
154
155 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
156 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
157 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
158 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
159 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
160
161 #define NODE_STRONGLOCK(l)      ((void)0)
162 #define NODE_STRONGUNLOCK(l)    ((void)0)
163 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
164 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
165 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
166 #else
167 typedef isc_mutex_t nodelock_t;
168
169 #define NODE_INITLOCK(l)        isc_mutex_init(l)
170 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
171 #define NODE_LOCK(l, t)         LOCK(l)
172 #define NODE_UNLOCK(l, t)       UNLOCK(l)
173 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
174
175 #define NODE_STRONGLOCK(l)      LOCK(l)
176 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
177 #define NODE_WEAKLOCK(l, t)     ((void)0)
178 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
179 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
180 #endif
181
182 #ifndef DNS_RDATASET_FIXED
183 #define DNS_RDATASET_FIXED 1
184 #endif
185
186 /*
187  * Allow clients with a virtual time of upto 5 minutes in the past to see
188  * records that would have otherwise have expired.
189  */
190 #define RBTDB_VIRTUAL 300
191
192 struct noqname {
193         dns_name_t name;
194         void *     nsec;
195         void *     nsecsig;
196 };
197
198 typedef struct acachectl acachectl_t;
199
200 typedef struct rdatasetheader {
201         /*%
202          * Locked by the owning node's lock.
203          */
204         rbtdb_serial_t                  serial;
205         dns_ttl_t                       ttl;
206         rbtdb_rdatatype_t               type;
207         isc_uint16_t                    attributes;
208         dns_trust_t                     trust;
209         struct noqname                  *noqname;
210         /*%<
211          * We don't use the LIST macros, because the LIST structure has
212          * both head and tail pointers, and is doubly linked.
213          */
214
215         struct rdatasetheader           *next;
216         /*%<
217          * If this is the top header for an rdataset, 'next' points
218          * to the top header for the next rdataset (i.e., the next type).
219          * Otherwise, it points up to the header whose down pointer points
220          * at this header.
221          */
222
223         struct rdatasetheader           *down;
224         /*%<
225          * Points to the header for the next older version of
226          * this rdataset.
227          */
228
229         isc_uint32_t                    count;
230         /*%<
231          * Monotonously increased every time this rdataset is bound so that
232          * it is used as the base of the starting point in DNS responses
233          * when the "cyclic" rrset-order is required.  Since the ordering
234          * should not be so crucial, no lock is set for the counter for
235          * performance reasons.
236          */
237
238         acachectl_t                     *additional_auth;
239         acachectl_t                     *additional_glue;
240 } rdatasetheader_t;
241
242 #define RDATASET_ATTR_NONEXISTENT       0x0001
243 #define RDATASET_ATTR_STALE             0x0002
244 #define RDATASET_ATTR_IGNORE            0x0004
245 #define RDATASET_ATTR_RETAIN            0x0008
246 #define RDATASET_ATTR_NXDOMAIN          0x0010
247
248 typedef struct acache_cbarg {
249         dns_rdatasetadditional_t        type;
250         unsigned int                    count;
251         dns_db_t                        *db;
252         dns_dbnode_t                    *node;
253         rdatasetheader_t                *header;
254 } acache_cbarg_t;
255
256 struct acachectl {
257         dns_acacheentry_t               *entry;
258         acache_cbarg_t                  *cbarg;
259 };
260
261 /*
262  * XXX
263  * When the cache will pre-expire data (due to memory low or other
264  * situations) before the rdataset's TTL has expired, it MUST
265  * respect the RETAIN bit and not expire the data until its TTL is
266  * expired.
267  */
268
269 #undef IGNORE                   /* WIN32 winbase.h defines this. */
270
271 #define EXISTS(header) \
272         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
273 #define NONEXISTENT(header) \
274         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
275 #define IGNORE(header) \
276         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
277 #define RETAIN(header) \
278         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
279 #define NXDOMAIN(header) \
280         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
281
282 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
283 #define DEFAULT_CACHE_NODE_LOCK_COUNT   1009    /*%< Should be prime. */
284
285 typedef struct {
286         nodelock_t                      lock;
287         /* Protected in the refcount routines. */
288         isc_refcount_t                  references;
289         /* Locked by lock. */
290         isc_boolean_t                   exiting;
291 } rbtdb_nodelock_t;
292
293 typedef struct rbtdb_changed {
294         dns_rbtnode_t *                 node;
295         isc_boolean_t                   dirty;
296         ISC_LINK(struct rbtdb_changed)  link;
297 } rbtdb_changed_t;
298
299 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
300
301 typedef struct rbtdb_version {
302         /* Not locked */
303         rbtdb_serial_t                  serial;
304         /*
305          * Protected in the refcount routines.
306          * XXXJT: should we change the lock policy based on the refcount
307          * performance?
308          */
309         isc_refcount_t                  references;
310         /* Locked by database lock. */
311         isc_boolean_t                   writer;
312         isc_boolean_t                   commit_ok;
313         rbtdb_changedlist_t             changed_list;
314         ISC_LINK(struct rbtdb_version)  link;
315 } rbtdb_version_t;
316
317 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
318
319 typedef struct {
320         /* Unlocked. */
321         dns_db_t                        common;
322 #if DNS_RBTDB_USERWLOCK
323         isc_rwlock_t                    lock;
324 #else
325         isc_mutex_t                     lock;
326 #endif
327         isc_rwlock_t                    tree_lock;
328         unsigned int                    node_lock_count;
329         rbtdb_nodelock_t *              node_locks;
330         dns_rbtnode_t *                 origin_node;
331         /* Locked by lock. */
332         unsigned int                    active;
333         isc_refcount_t                  references;
334         unsigned int                    attributes;
335         rbtdb_serial_t                  current_serial;
336         rbtdb_serial_t                  least_serial;
337         rbtdb_serial_t                  next_serial;
338         rbtdb_version_t *               current_version;
339         rbtdb_version_t *               future_version;
340         rbtdb_versionlist_t             open_versions;
341         isc_boolean_t                   overmem;
342         isc_task_t *                    task;
343         dns_dbnode_t                    *soanode;
344         dns_dbnode_t                    *nsnode;
345         /* Locked by tree_lock. */
346         dns_rbt_t *                     tree;
347         isc_boolean_t                   secure;
348
349         /* Unlocked */
350         unsigned int                    quantum;
351 } dns_rbtdb_t;
352
353 #define RBTDB_ATTR_LOADED               0x01
354 #define RBTDB_ATTR_LOADING              0x02
355
356 /*%
357  * Search Context
358  */
359 typedef struct {
360         dns_rbtdb_t *           rbtdb;
361         rbtdb_version_t *       rbtversion;
362         rbtdb_serial_t          serial;
363         unsigned int            options;
364         dns_rbtnodechain_t      chain;
365         isc_boolean_t           copy_name;
366         isc_boolean_t           need_cleanup;
367         isc_boolean_t           wild;
368         dns_rbtnode_t *         zonecut;
369         rdatasetheader_t *      zonecut_rdataset;
370         rdatasetheader_t *      zonecut_sigrdataset;
371         dns_fixedname_t         zonecut_name;
372         isc_stdtime_t           now;
373 } rbtdb_search_t;
374
375 /*%
376  * Load Context
377  */
378 typedef struct {
379         dns_rbtdb_t *           rbtdb;
380         isc_stdtime_t           now;
381 } rbtdb_load_t;
382
383 static void rdataset_disassociate(dns_rdataset_t *rdataset);
384 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
385 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
386 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
387 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
388 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
389 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
390                                         dns_name_t *name,
391                                         dns_rdataset_t *nsec,
392                                         dns_rdataset_t *nsecsig);
393 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
394                                            dns_rdatasetadditional_t type,
395                                            dns_rdatatype_t qtype,
396                                            dns_acache_t *acache,
397                                            dns_zone_t **zonep,
398                                            dns_db_t **dbp,
399                                            dns_dbversion_t **versionp,
400                                            dns_dbnode_t **nodep,
401                                            dns_name_t *fname,
402                                            dns_message_t *msg,
403                                            isc_stdtime_t now);
404 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
405                                            dns_rdatasetadditional_t type,
406                                            dns_rdatatype_t qtype,
407                                            dns_acache_t *acache,
408                                            dns_zone_t *zone,
409                                            dns_db_t *db,
410                                            dns_dbversion_t *version,
411                                            dns_dbnode_t *node,
412                                            dns_name_t *fname);
413 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
414                                            dns_rdataset_t *rdataset,
415                                            dns_rdatasetadditional_t type,
416                                            dns_rdatatype_t qtype);
417
418 static dns_rdatasetmethods_t rdataset_methods = {
419         rdataset_disassociate,
420         rdataset_first,
421         rdataset_next,
422         rdataset_current,
423         rdataset_clone,
424         rdataset_count,
425         NULL,
426         rdataset_getnoqname,
427         rdataset_getadditional,
428         rdataset_setadditional,
429         rdataset_putadditional
430 };
431
432 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
433 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
434 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
435 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
436                                  dns_rdataset_t *rdataset);
437
438 static dns_rdatasetitermethods_t rdatasetiter_methods = {
439         rdatasetiter_destroy,
440         rdatasetiter_first,
441         rdatasetiter_next,
442         rdatasetiter_current
443 };
444
445 typedef struct rbtdb_rdatasetiter {
446         dns_rdatasetiter_t              common;
447         rdatasetheader_t *              current;
448 } rbtdb_rdatasetiter_t;
449
450 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
451 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
452 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
453 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
454                                         dns_name_t *name);
455 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
456 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
457 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
458                                            dns_dbnode_t **nodep,
459                                            dns_name_t *name);
460 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
461 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
462                                           dns_name_t *name);
463
464 static dns_dbiteratormethods_t dbiterator_methods = {
465         dbiterator_destroy,
466         dbiterator_first,
467         dbiterator_last,
468         dbiterator_seek,
469         dbiterator_prev,
470         dbiterator_next,
471         dbiterator_current,
472         dbiterator_pause,
473         dbiterator_origin
474 };
475
476 #define DELETION_BATCH_MAX 64
477
478 /*
479  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
480  */
481 typedef struct rbtdb_dbiterator {
482         dns_dbiterator_t                common;
483         isc_boolean_t                   paused;
484         isc_boolean_t                   new_origin;
485         isc_rwlocktype_t                tree_locked;
486         isc_result_t                    result;
487         dns_fixedname_t                 name;
488         dns_fixedname_t                 origin;
489         dns_rbtnodechain_t              chain;
490         dns_rbtnode_t                   *node;
491         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
492         int                             delete;
493 } rbtdb_dbiterator_t;
494
495
496 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
497 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
498
499 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
500                        isc_event_t *event);
501
502 /*%
503  * 'init_count' is used to initialize 'newheader->count' which inturn
504  * is used to determine where in the cycle rrset-order cyclic starts.
505  * We don't lock this as we don't care about simultanious updates.
506  *
507  * Note:
508  *      Both init_count and header->count can be ISC_UINT32_MAX.
509  *      The count on the returned rdataset however can't be as
510  *      that indicates that the database does not implement cyclic
511  *      processing.
512  */
513 static unsigned int init_count;
514
515 /*
516  * Locking
517  *
518  * If a routine is going to lock more than one lock in this module, then
519  * the locking must be done in the following order:
520  *
521  *      Tree Lock
522  *
523  *      Node Lock       (Only one from the set may be locked at one time by
524  *                       any caller)
525  *
526  *      Database Lock
527  *
528  * Failure to follow this hierarchy can result in deadlock.
529  */
530
531 /*
532  * Deleting Nodes
533  *
534  * Currently there is no deletion of nodes from the database, except when
535  * the database is being destroyed.
536  *
537  * If node deletion is added in the future, then for zone databases the node
538  * for the origin of the zone MUST NOT be deleted.
539  */
540
541
542 /*
543  * DB Routines
544  */
545
546 static void
547 attach(dns_db_t *source, dns_db_t **targetp) {
548         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
549
550         REQUIRE(VALID_RBTDB(rbtdb));
551
552         isc_refcount_increment(&rbtdb->references, NULL);
553
554         *targetp = source;
555 }
556
557 static void
558 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
559         dns_rbtdb_t *rbtdb = event->ev_arg;
560
561         UNUSED(task);
562
563         free_rbtdb(rbtdb, ISC_TRUE, event);
564 }
565
566 /*%
567  * Work out how many nodes can be deleted in the time between two
568  * requests to the nameserver.  Smooth the resulting number and use it
569  * as a estimate for the number of nodes to be deleted in the next
570  * iteration.
571  */
572 static unsigned int
573 adjust_quantum(unsigned int old, isc_time_t *start) {
574         unsigned int pps = dns_pps;     /* packets per second */
575         unsigned int interval;
576         isc_uint64_t usecs;
577         isc_time_t end;
578         unsigned int new;
579
580         if (pps < 100)
581                 pps = 100;
582         isc_time_now(&end);
583
584         interval = 1000000 / pps;       /* interval in usec */
585         if (interval == 0)
586                 interval = 1;
587         usecs = isc_time_microdiff(&end, start);
588         if (usecs == 0) {
589                 /*
590                  * We were unable to measure the amount of time taken.
591                  * Double the nodes deleted next time.
592                  */
593                 old *= 2;
594                 if (old > 1000)
595                         old = 1000;
596                 return (old);
597         }
598         new = old * interval;
599         new /= (unsigned int)usecs;
600         if (new == 0)
601                 new = 1;
602         else if (new > 1000)
603                 new = 1000;
604
605         /* Smooth */
606         new = (new + old * 3) / 4;
607
608         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
609                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
610
611         return (new);
612 }
613
614 static void
615 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
616         unsigned int i;
617         isc_ondestroy_t ondest;
618         isc_result_t result;
619         char buf[DNS_NAME_FORMATSIZE];
620         isc_time_t start;
621
622         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
623         REQUIRE(rbtdb->future_version == NULL);
624
625         if (rbtdb->current_version != NULL) {
626                 unsigned int refs;
627
628                 isc_refcount_decrement(&rbtdb->current_version->references,
629                                        &refs);
630                 INSIST(refs == 0);
631                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
632                 isc_refcount_destroy(&rbtdb->current_version->references);
633                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
634                             sizeof(rbtdb_version_t));
635         }
636         if (event == NULL)
637                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
638  again:
639         if (rbtdb->tree != NULL) {
640                 isc_time_now(&start);
641                 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
642                 if (result == ISC_R_QUOTA) {
643                         INSIST(rbtdb->task != NULL);
644                         if (rbtdb->quantum != 0)
645                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
646                                                                 &start);
647                         if (event == NULL)
648                                 event = isc_event_allocate(rbtdb->common.mctx,
649                                                            NULL,
650                                                          DNS_EVENT_FREESTORAGE,
651                                                            free_rbtdb_callback,
652                                                            rbtdb,
653                                                            sizeof(isc_event_t));
654                         if (event == NULL)
655                                 goto again;
656                         isc_task_send(rbtdb->task, &event);
657                         return;
658                 }
659                 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
660         }
661         if (event != NULL)
662                 isc_event_free(&event);
663         if (log) {
664                 if (dns_name_dynamic(&rbtdb->common.origin))
665                         dns_name_format(&rbtdb->common.origin, buf,
666                                         sizeof(buf));
667                 else
668                         strcpy(buf, "<UNKNOWN>");
669                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
670                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
671                               "done free_rbtdb(%s)", buf);
672         }
673         if (dns_name_dynamic(&rbtdb->common.origin))
674                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
675         for (i = 0; i < rbtdb->node_lock_count; i++) {
676                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
677                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
678         }
679         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
680                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
681         isc_rwlock_destroy(&rbtdb->tree_lock);
682         isc_refcount_destroy(&rbtdb->references);
683         if (rbtdb->task != NULL)
684                 isc_task_detach(&rbtdb->task);
685         RBTDB_DESTROYLOCK(&rbtdb->lock);
686         rbtdb->common.magic = 0;
687         rbtdb->common.impmagic = 0;
688         ondest = rbtdb->common.ondest;
689         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
690         isc_ondestroy_notify(&ondest, rbtdb);
691 }
692
693 static inline void
694 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
695         isc_boolean_t want_free = ISC_FALSE;
696         unsigned int i;
697         unsigned int inactive = 0;
698
699         /* XXX check for open versions here */
700
701         if (rbtdb->soanode != NULL)
702                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
703         if (rbtdb->nsnode != NULL)
704                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
705
706         /*
707          * Even though there are no external direct references, there still
708          * may be nodes in use.
709          */
710         for (i = 0; i < rbtdb->node_lock_count; i++) {
711                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
712                 rbtdb->node_locks[i].exiting = ISC_TRUE;
713                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
714                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
715                     == 0) {
716                         inactive++;
717                 }
718         }
719
720         if (inactive != 0) {
721                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
722                 rbtdb->active -= inactive;
723                 if (rbtdb->active == 0)
724                         want_free = ISC_TRUE;
725                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
726                 if (want_free) {
727                         char buf[DNS_NAME_FORMATSIZE];
728                         if (dns_name_dynamic(&rbtdb->common.origin))
729                                 dns_name_format(&rbtdb->common.origin, buf,
730                                                 sizeof(buf));
731                         else
732                                 strcpy(buf, "<UNKNOWN>");
733                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
734                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
735                                       "calling free_rbtdb(%s)", buf);
736                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
737                 }
738         }
739 }
740
741 static void
742 detach(dns_db_t **dbp) {
743         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
744         unsigned int refs;
745
746         REQUIRE(VALID_RBTDB(rbtdb));
747
748         isc_refcount_decrement(&rbtdb->references, &refs);
749
750         if (refs == 0)
751                 maybe_free_rbtdb(rbtdb);
752
753         *dbp = NULL;
754 }
755
756 static void
757 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
758         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
759         rbtdb_version_t *version;
760         unsigned int refs;
761
762         REQUIRE(VALID_RBTDB(rbtdb));
763
764         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
765         version = rbtdb->current_version;
766         isc_refcount_increment(&version->references, &refs);
767         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
768
769         *versionp = (dns_dbversion_t *)version;
770 }
771
772 static inline rbtdb_version_t *
773 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
774                  unsigned int references, isc_boolean_t writer)
775 {
776         isc_result_t result;
777         rbtdb_version_t *version;
778
779         version = isc_mem_get(mctx, sizeof(*version));
780         if (version == NULL)
781                 return (NULL);
782         version->serial = serial;
783         result = isc_refcount_init(&version->references, references);
784         if (result != ISC_R_SUCCESS) {
785                 isc_mem_put(mctx, version, sizeof(*version));
786                 return (NULL);
787         }
788         version->writer = writer;
789         version->commit_ok = ISC_FALSE;
790         ISC_LIST_INIT(version->changed_list);
791         ISC_LINK_INIT(version, link);
792
793         return (version);
794 }
795
796 static isc_result_t
797 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
798         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
799         rbtdb_version_t *version;
800
801         REQUIRE(VALID_RBTDB(rbtdb));
802         REQUIRE(versionp != NULL && *versionp == NULL);
803         REQUIRE(rbtdb->future_version == NULL);
804
805         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
806         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
807         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
808                                    ISC_TRUE);
809         if (version != NULL) {
810                 version->commit_ok = ISC_TRUE;
811                 rbtdb->next_serial++;
812                 rbtdb->future_version = version;
813         }
814         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
815
816         if (version == NULL)
817                 return (ISC_R_NOMEMORY);
818
819         *versionp = version;
820
821         return (ISC_R_SUCCESS);
822 }
823
824 static void
825 attachversion(dns_db_t *db, dns_dbversion_t *source,
826               dns_dbversion_t **targetp)
827 {
828         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
829         rbtdb_version_t *rbtversion = source;
830         unsigned int refs;
831
832         REQUIRE(VALID_RBTDB(rbtdb));
833
834         isc_refcount_increment(&rbtversion->references, &refs);
835         INSIST(refs > 1);
836
837         *targetp = rbtversion;
838 }
839
840 static rbtdb_changed_t *
841 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
842             dns_rbtnode_t *node)
843 {
844         rbtdb_changed_t *changed;
845         unsigned int refs;
846
847         /*
848          * Caller must be holding the node lock if its reference must be
849          * protected by the lock.
850          */
851
852         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
853
854         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
855
856         REQUIRE(version->writer);
857
858         if (changed != NULL) {
859                 dns_rbtnode_refincrement(node, &refs);
860                 INSIST(refs != 0);
861                 changed->node = node;
862                 changed->dirty = ISC_FALSE;
863                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
864         } else
865                 version->commit_ok = ISC_FALSE;
866
867         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
868
869         return (changed);
870 }
871
872 static void
873 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
874                  acachectl_t *array)
875 {
876         unsigned int count;
877         unsigned int i;
878         unsigned char *raw;     /* RDATASLAB */
879
880         /*
881          * The caller must be holding the corresponding node lock.
882          */
883
884         if (array == NULL)
885                 return;
886
887         raw = (unsigned char *)header + sizeof(*header);
888         count = raw[0] * 256 + raw[1];
889
890         /*
891          * Sanity check: since an additional cache entry has a reference to
892          * the original DB node (in the callback arg), there should be no
893          * acache entries when the node can be freed.
894          */
895         for (i = 0; i < count; i++)
896                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
897
898         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
899 }
900
901 static inline void
902 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
903
904         if (dns_name_dynamic(&(*noqname)->name))
905                 dns_name_free(&(*noqname)->name, mctx);
906         if ((*noqname)->nsec != NULL)
907                 isc_mem_put(mctx, (*noqname)->nsec,
908                             dns_rdataslab_size((*noqname)->nsec, 0));
909         if ((*noqname)->nsecsig != NULL)
910                 isc_mem_put(mctx, (*noqname)->nsecsig,
911                             dns_rdataslab_size((*noqname)->nsecsig, 0));
912         isc_mem_put(mctx, *noqname, sizeof(**noqname));
913         *noqname = NULL;
914 }
915
916 static inline void
917 free_rdataset(isc_mem_t *mctx, rdatasetheader_t *rdataset) {
918         unsigned int size;
919
920         if (rdataset->noqname != NULL)
921                 free_noqname(mctx, &rdataset->noqname);
922
923         free_acachearray(mctx, rdataset, rdataset->additional_auth);
924         free_acachearray(mctx, rdataset, rdataset->additional_glue);
925
926         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
927                 size = sizeof(*rdataset);
928         else
929                 size = dns_rdataslab_size((unsigned char *)rdataset,
930                                           sizeof(*rdataset));
931         isc_mem_put(mctx, rdataset, size);
932 }
933
934 static inline void
935 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
936         rdatasetheader_t *header, *dcurrent;
937         isc_boolean_t make_dirty = ISC_FALSE;
938
939         /*
940          * Caller must hold the node lock.
941          */
942
943         /*
944          * We set the IGNORE attribute on rdatasets with serial number
945          * 'serial'.  When the reference count goes to zero, these rdatasets
946          * will be cleaned up; until that time, they will be ignored.
947          */
948         for (header = node->data; header != NULL; header = header->next) {
949                 if (header->serial == serial) {
950                         header->attributes |= RDATASET_ATTR_IGNORE;
951                         make_dirty = ISC_TRUE;
952                 }
953                 for (dcurrent = header->down;
954                      dcurrent != NULL;
955                      dcurrent = dcurrent->down) {
956                         if (dcurrent->serial == serial) {
957                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
958                                 make_dirty = ISC_TRUE;
959                         }
960                 }
961         }
962         if (make_dirty)
963                 node->dirty = 1;
964 }
965
966 static inline void
967 clean_stale_headers(isc_mem_t *mctx, rdatasetheader_t *top) {
968         rdatasetheader_t *d, *down_next;
969
970         for (d = top->down; d != NULL; d = down_next) {
971                 down_next = d->down;
972                 free_rdataset(mctx, d);
973         }
974         top->down = NULL;
975 }
976
977 static inline void
978 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
979         rdatasetheader_t *current, *top_prev, *top_next;
980         isc_mem_t *mctx = rbtdb->common.mctx;
981
982         /*
983          * Caller must be holding the node lock.
984          */
985
986         top_prev = NULL;
987         for (current = node->data; current != NULL; current = top_next) {
988                 top_next = current->next;
989                 clean_stale_headers(mctx, current);
990                 /*
991                  * If current is nonexistent or stale, we can clean it up.
992                  */
993                 if ((current->attributes &
994                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
995                         if (top_prev != NULL)
996                                 top_prev->next = current->next;
997                         else
998                                 node->data = current->next;
999                         free_rdataset(mctx, current);
1000                 } else
1001                         top_prev = current;
1002         }
1003         node->dirty = 0;
1004 }
1005
1006 static inline void
1007 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1008                 rbtdb_serial_t least_serial)
1009 {
1010         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1011         rdatasetheader_t *top_prev, *top_next;
1012         isc_mem_t *mctx = rbtdb->common.mctx;
1013         isc_boolean_t still_dirty = ISC_FALSE;
1014
1015         /*
1016          * Caller must be holding the node lock.
1017          */
1018         REQUIRE(least_serial != 0);
1019
1020         top_prev = NULL;
1021         for (current = node->data; current != NULL; current = top_next) {
1022                 top_next = current->next;
1023
1024                 /*
1025                  * First, we clean up any instances of multiple rdatasets
1026                  * with the same serial number, or that have the IGNORE
1027                  * attribute.
1028                  */
1029                 dparent = current;
1030                 for (dcurrent = current->down;
1031                      dcurrent != NULL;
1032                      dcurrent = down_next) {
1033                         down_next = dcurrent->down;
1034                         INSIST(dcurrent->serial <= dparent->serial);
1035                         if (dcurrent->serial == dparent->serial ||
1036                             IGNORE(dcurrent)) {
1037                                 if (down_next != NULL)
1038                                         down_next->next = dparent;
1039                                 dparent->down = down_next;
1040                                 free_rdataset(mctx, dcurrent);
1041                         } else
1042                                 dparent = dcurrent;
1043                 }
1044
1045                 /*
1046                  * We've now eliminated all IGNORE datasets with the possible
1047                  * exception of current, which we now check.
1048                  */
1049                 if (IGNORE(current)) {
1050                         down_next = current->down;
1051                         if (down_next == NULL) {
1052                                 if (top_prev != NULL)
1053                                         top_prev->next = current->next;
1054                                 else
1055                                         node->data = current->next;
1056                                 free_rdataset(mctx, current);
1057                                 /*
1058                                  * current no longer exists, so we can
1059                                  * just continue with the loop.
1060                                  */
1061                                 continue;
1062                         } else {
1063                                 /*
1064                                  * Pull up current->down, making it the new
1065                                  * current.
1066                                  */
1067                                 if (top_prev != NULL)
1068                                         top_prev->next = down_next;
1069                                 else
1070                                         node->data = down_next;
1071                                 down_next->next = top_next;
1072                                 free_rdataset(mctx, current);
1073                                 current = down_next;
1074                         }
1075                 }
1076
1077                 /*
1078                  * We now try to find the first down node less than the
1079                  * least serial.
1080                  */
1081                 dparent = current;
1082                 for (dcurrent = current->down;
1083                      dcurrent != NULL;
1084                      dcurrent = down_next) {
1085                         down_next = dcurrent->down;
1086                         if (dcurrent->serial < least_serial)
1087                                 break;
1088                         dparent = dcurrent;
1089                 }
1090
1091                 /*
1092                  * If there is a such an rdataset, delete it and any older
1093                  * versions.
1094                  */
1095                 if (dcurrent != NULL) {
1096                         do {
1097                                 down_next = dcurrent->down;
1098                                 INSIST(dcurrent->serial <= least_serial);
1099                                 free_rdataset(mctx, dcurrent);
1100                                 dcurrent = down_next;
1101                         } while (dcurrent != NULL);
1102                         dparent->down = NULL;
1103                 }
1104
1105                 /*
1106                  * Note.  The serial number of 'current' might be less than
1107                  * least_serial too, but we cannot delete it because it is
1108                  * the most recent version, unless it is a NONEXISTENT
1109                  * rdataset.
1110                  */
1111                 if (current->down != NULL) {
1112                         still_dirty = ISC_TRUE;
1113                         top_prev = current;
1114                 } else {
1115                         /*
1116                          * If this is a NONEXISTENT rdataset, we can delete it.
1117                          */
1118                         if (NONEXISTENT(current)) {
1119                                 if (top_prev != NULL)
1120                                         top_prev->next = current->next;
1121                                 else
1122                                         node->data = current->next;
1123                                 free_rdataset(mctx, current);
1124                         } else
1125                                 top_prev = current;
1126                 }
1127         }
1128         if (!still_dirty)
1129                 node->dirty = 0;
1130 }
1131
1132 /*
1133  * Caller must be holding the node lock if its reference must be protected
1134  * by the lock.
1135  */
1136 static inline void
1137 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1138         unsigned int lockrefs, noderefs;
1139         isc_refcount_t *lockref;
1140
1141         dns_rbtnode_refincrement0(node, &noderefs);
1142         if (noderefs == 1) {    /* this is the first reference to the node */
1143                 lockref = &rbtdb->node_locks[node->locknum].references;
1144                 isc_refcount_increment0(lockref, &lockrefs);
1145                 INSIST(lockrefs != 0);
1146         }
1147         INSIST(noderefs != 0);
1148 }
1149
1150 /*
1151  * Caller must be holding the node lock; either the "strong", read or write
1152  * lock.  Note that the lock must be held even when node references are
1153  * atomically modified; in that case the decrement operation itself does not
1154  * have to be protected, but we must avoid a race condition where multiple
1155  * threads are decreasing the reference to zero simultaneously and at least
1156  * one of them is going to free the node.
1157  * This function returns ISC_TRUE if and only if the node reference decreases
1158  * to zero.
1159  */
1160 static isc_boolean_t
1161 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1162                     rbtdb_serial_t least_serial,
1163                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock)
1164 {
1165         isc_result_t result;
1166         isc_boolean_t write_locked;
1167         rbtdb_nodelock_t *nodelock;
1168         unsigned int refs, nrefs;
1169
1170         nodelock = &rbtdb->node_locks[node->locknum];
1171
1172         /* Handle easy and typical case first. */
1173         if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1174                 dns_rbtnode_refdecrement(node, &nrefs);
1175                 INSIST((int)nrefs >= 0);
1176                 if (nrefs == 0) {
1177                         isc_refcount_decrement(&nodelock->references, &refs);
1178                         INSIST((int)refs >= 0);
1179                 }
1180                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1181         }
1182
1183         /* Upgrade the lock? */
1184         if (nlock == isc_rwlocktype_read) {
1185                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1186                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1187         }
1188         dns_rbtnode_refdecrement(node, &nrefs);
1189         INSIST((int)nrefs >= 0);
1190         if (nrefs > 0) {
1191                 /* Restore the lock? */
1192                 if (nlock == isc_rwlocktype_read)
1193                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1194                 return (ISC_FALSE);
1195         }
1196
1197         if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1198                 if (IS_CACHE(rbtdb))
1199                         clean_cache_node(rbtdb, node);
1200                 else {
1201                         if (least_serial == 0) {
1202                                 /*
1203                                  * Caller doesn't know the least serial.
1204                                  * Get it.
1205                                  */
1206                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1207                                 least_serial = rbtdb->least_serial;
1208                                 RBTDB_UNLOCK(&rbtdb->lock,
1209                                              isc_rwlocktype_read);
1210                         }
1211                         clean_zone_node(rbtdb, node, least_serial);
1212                 }
1213         }
1214
1215         isc_refcount_decrement(&nodelock->references, &refs);
1216         INSIST((int)refs >= 0);
1217
1218         /*
1219          * XXXDCL should this only be done for cache zones?
1220          */
1221         if (node->data != NULL || node->down != NULL) {
1222                 /* Restore the lock? */
1223                 if (nlock == isc_rwlocktype_read)
1224                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1225                 return (ISC_TRUE);
1226         }
1227
1228         /*
1229          * XXXDCL need to add a deferred delete method for ISC_R_LOCKBUSY.
1230          */
1231         if (tlock != isc_rwlocktype_write) {
1232                 /*
1233                  * Locking hierarchy notwithstanding, we don't need to free
1234                  * the node lock before acquiring the tree write lock because
1235                  * we only do a trylock.
1236                  */
1237                 if (tlock == isc_rwlocktype_read)
1238                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1239                 else
1240                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
1241                                                     isc_rwlocktype_write);
1242                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1243                               result == ISC_R_LOCKBUSY);
1244
1245                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1246         } else
1247                 write_locked = ISC_TRUE;
1248
1249         if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1250                 /*
1251                  * We can now delete the node if the reference counter is
1252                  * zero.  This should be typically the case, but a different
1253                  * thread may still gain a (new) reference just before the
1254                  * current thread locks the tree (e.g., in findnode()).
1255                  */
1256
1257                 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1258                         char printname[DNS_NAME_FORMATSIZE];
1259
1260                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1261                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1262                                       "decrement_reference: "
1263                                       "delete from rbt: %p %s",
1264                                       node,
1265                                       dns_rbt_formatnodename(node, printname,
1266                                                            sizeof(printname)));
1267                 }
1268
1269                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1270                 if (result != ISC_R_SUCCESS)
1271                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1272                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1273                                       "decrement_reference: "
1274                                       "dns_rbt_deletenode: %s",
1275                                       isc_result_totext(result));
1276         }
1277
1278         /* Restore the lock? */
1279         if (nlock == isc_rwlocktype_read)
1280                 NODE_WEAKDOWNGRADE(&nodelock->lock);
1281
1282         /*
1283          * Relock a read lock, or unlock the write lock if no lock was held.
1284          */
1285         if (tlock == isc_rwlocktype_none)
1286                 if (write_locked)
1287                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1288
1289         if (tlock == isc_rwlocktype_read)
1290                 if (write_locked)
1291                         isc_rwlock_downgrade(&rbtdb->tree_lock);
1292
1293         return (ISC_TRUE);
1294 }
1295
1296 static inline void
1297 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1298                    rbtdb_changedlist_t *cleanup_list)
1299 {
1300         /*
1301          * Caller must be holding the database lock.
1302          */
1303
1304         rbtdb->least_serial = version->serial;
1305         *cleanup_list = version->changed_list;
1306         ISC_LIST_INIT(version->changed_list);
1307 }
1308
1309 static inline void
1310 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1311         rbtdb_changed_t *changed, *next_changed;
1312
1313         /*
1314          * If the changed record is dirty, then
1315          * an update created multiple versions of
1316          * a given rdataset.  We keep this list
1317          * until we're the least open version, at
1318          * which point it's safe to get rid of any
1319          * older versions.
1320          *
1321          * If the changed record isn't dirty, then
1322          * we don't need it anymore since we're
1323          * committing and not rolling back.
1324          *
1325          * The caller must be holding the database lock.
1326          */
1327         for (changed = HEAD(version->changed_list);
1328              changed != NULL;
1329              changed = next_changed) {
1330                 next_changed = NEXT(changed, link);
1331                 if (!changed->dirty) {
1332                         UNLINK(version->changed_list,
1333                                changed, link);
1334                         APPEND(*cleanup_list,
1335                                changed, link);
1336                 }
1337         }
1338 }
1339
1340 static isc_boolean_t
1341 iszonesecure(dns_db_t *db, dns_dbnode_t *origin) {
1342         dns_rdataset_t keyset;
1343         dns_rdataset_t nsecset, signsecset;
1344         isc_boolean_t haszonekey = ISC_FALSE;
1345         isc_boolean_t hasnsec = ISC_FALSE;
1346         isc_result_t result;
1347
1348         dns_rdataset_init(&keyset);
1349         result = dns_db_findrdataset(db, origin, NULL, dns_rdatatype_dnskey, 0,
1350                                      0, &keyset, NULL);
1351         if (result == ISC_R_SUCCESS) {
1352                 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1353                 result = dns_rdataset_first(&keyset);
1354                 while (result == ISC_R_SUCCESS) {
1355                         dns_rdataset_current(&keyset, &keyrdata);
1356                         if (dns_zonekey_iszonekey(&keyrdata)) {
1357                                 haszonekey = ISC_TRUE;
1358                                 break;
1359                         }
1360                         result = dns_rdataset_next(&keyset);
1361                 }
1362                 dns_rdataset_disassociate(&keyset);
1363         }
1364         if (!haszonekey)
1365                 return (ISC_FALSE);
1366
1367         dns_rdataset_init(&nsecset);
1368         dns_rdataset_init(&signsecset);
1369         result = dns_db_findrdataset(db, origin, NULL, dns_rdatatype_nsec, 0,
1370                                      0, &nsecset, &signsecset);
1371         if (result == ISC_R_SUCCESS) {
1372                 if (dns_rdataset_isassociated(&signsecset)) {
1373                         hasnsec = ISC_TRUE;
1374                         dns_rdataset_disassociate(&signsecset);
1375                 }
1376                 dns_rdataset_disassociate(&nsecset);
1377         }
1378         return (hasnsec);
1379 }
1380
1381 static void
1382 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
1383         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1384         rbtdb_version_t *version, *cleanup_version, *least_greater;
1385         isc_boolean_t rollback = ISC_FALSE;
1386         rbtdb_changedlist_t cleanup_list;
1387         rbtdb_changed_t *changed, *next_changed;
1388         rbtdb_serial_t serial, least_serial;
1389         dns_rbtnode_t *rbtnode;
1390         unsigned int refs;
1391         isc_boolean_t writer;
1392
1393         REQUIRE(VALID_RBTDB(rbtdb));
1394         version = (rbtdb_version_t *)*versionp;
1395
1396         cleanup_version = NULL;
1397         ISC_LIST_INIT(cleanup_list);
1398
1399         isc_refcount_decrement(&version->references, &refs);
1400         if (refs > 0) {         /* typical and easy case first */
1401                 if (commit) {
1402                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1403                         INSIST(!version->writer);
1404                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1405                 }
1406                 goto end;
1407         }
1408
1409         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1410         serial = version->serial;
1411         writer = version->writer;
1412         if (version->writer) {
1413                 if (commit) {
1414                         unsigned cur_ref;
1415                         rbtdb_version_t *cur_version;
1416
1417                         INSIST(version->commit_ok);
1418                         INSIST(version == rbtdb->future_version);
1419                         /*
1420                          * The current version is going to be replaced.
1421                          * Release the (likely last) reference to it from the
1422                          * DB itself and unlink it from the open list.
1423                          */
1424                         cur_version = rbtdb->current_version;
1425                         isc_refcount_decrement(&cur_version->references,
1426                                                &cur_ref);
1427                         if (cur_ref == 0) {
1428                                 if (cur_version->serial == rbtdb->least_serial)
1429                                         INSIST(EMPTY(cur_version->changed_list));
1430                                 UNLINK(rbtdb->open_versions,
1431                                        cur_version, link);
1432                         }
1433                         if (EMPTY(rbtdb->open_versions)) {
1434                                 /*
1435                                  * We're going to become the least open
1436                                  * version.
1437                                  */
1438                                 make_least_version(rbtdb, version,
1439                                                    &cleanup_list);
1440                         } else {
1441                                 /*
1442                                  * Some other open version is the
1443                                  * least version.  We can't cleanup
1444                                  * records that were changed in this
1445                                  * version because the older versions
1446                                  * may still be in use by an open
1447                                  * version.
1448                                  *
1449                                  * We can, however, discard the
1450                                  * changed records for things that
1451                                  * we've added that didn't exist in
1452                                  * prior versions.
1453                                  */
1454                                 cleanup_nondirty(version, &cleanup_list);
1455                         }
1456                         /*
1457                          * If the (soon to be former) current version
1458                          * isn't being used by anyone, we can clean
1459                          * it up.
1460                          */
1461                         if (cur_ref == 0) {
1462                                 cleanup_version = cur_version;
1463                                 APPENDLIST(version->changed_list,
1464                                            cleanup_version->changed_list,
1465                                            link);
1466                         }
1467                         /*
1468                          * Become the current version.
1469                          */
1470                         version->writer = ISC_FALSE;
1471                         rbtdb->current_version = version;
1472                         rbtdb->current_serial = version->serial;
1473                         rbtdb->future_version = NULL;
1474
1475                         /*
1476                          * Keep the current version in the open list, and
1477                          * gain a reference for the DB itself (see the DB
1478                          * creation function below).  This must be the only
1479                          * case where we need to increment the counter from
1480                          * zero and need to use isc_refcount_increment0().
1481                          */
1482                         isc_refcount_increment0(&version->references,
1483                                                 &cur_ref);
1484                         INSIST(cur_ref == 1);
1485                         PREPEND(rbtdb->open_versions,
1486                                 rbtdb->current_version, link);
1487                 } else {
1488                         /*
1489                          * We're rolling back this transaction.
1490                          */
1491                         cleanup_list = version->changed_list;
1492                         ISC_LIST_INIT(version->changed_list);
1493                         rollback = ISC_TRUE;
1494                         cleanup_version = version;
1495                         rbtdb->future_version = NULL;
1496                 }
1497         } else {
1498                 if (version != rbtdb->current_version) {
1499                         /*
1500                          * There are no external or internal references
1501                          * to this version and it can be cleaned up.
1502                          */
1503                         cleanup_version = version;
1504
1505                         /*
1506                          * Find the version with the least serial
1507                          * number greater than ours.
1508                          */
1509                         least_greater = PREV(version, link);
1510                         if (least_greater == NULL)
1511                                 least_greater = rbtdb->current_version;
1512
1513                         INSIST(version->serial < least_greater->serial);
1514                         /*
1515                          * Is this the least open version?
1516                          */
1517                         if (version->serial == rbtdb->least_serial) {
1518                                 /*
1519                                  * Yes.  Install the new least open
1520                                  * version.
1521                                  */
1522                                 make_least_version(rbtdb,
1523                                                    least_greater,
1524                                                    &cleanup_list);
1525                         } else {
1526                                 /*
1527                                  * Add any unexecuted cleanups to
1528                                  * those of the least greater version.
1529                                  */
1530                                 APPENDLIST(least_greater->changed_list,
1531                                            version->changed_list,
1532                                            link);
1533                         }
1534                 } else if (version->serial == rbtdb->least_serial)
1535                         INSIST(EMPTY(version->changed_list));
1536                 UNLINK(rbtdb->open_versions, version, link);
1537         }
1538         least_serial = rbtdb->least_serial;
1539         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1540
1541         /*
1542          * Update the zone's secure status.
1543          */
1544         if (writer && commit && !IS_CACHE(rbtdb))
1545                 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
1546
1547         if (cleanup_version != NULL) {
1548                 INSIST(EMPTY(cleanup_version->changed_list));
1549                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
1550                             sizeof(*cleanup_version));
1551         }
1552
1553         if (!EMPTY(cleanup_list)) {
1554                 for (changed = HEAD(cleanup_list);
1555                      changed != NULL;
1556                      changed = next_changed) {
1557                         nodelock_t *lock;
1558
1559                         next_changed = NEXT(changed, link);
1560                         rbtnode = changed->node;
1561                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
1562
1563                         NODE_LOCK(lock, isc_rwlocktype_write);
1564                         if (rollback)
1565                                 rollback_node(rbtnode, serial);
1566                         decrement_reference(rbtdb, rbtnode, least_serial,
1567                                             isc_rwlocktype_write,
1568                                             isc_rwlocktype_none);
1569                         NODE_UNLOCK(lock, isc_rwlocktype_write);
1570
1571                         isc_mem_put(rbtdb->common.mctx, changed,
1572                                     sizeof(*changed));
1573                 }
1574         }
1575
1576   end:
1577         *versionp = NULL;
1578 }
1579
1580 /*
1581  * Add the necessary magic for the wildcard name 'name'
1582  * to be found in 'rbtdb'.
1583  *
1584  * In order for wildcard matching to work correctly in
1585  * zone_find(), we must ensure that a node for the wildcarding
1586  * level exists in the database, and has its 'find_callback'
1587  * and 'wild' bits set.
1588  *
1589  * E.g. if the wildcard name is "*.sub.example." then we
1590  * must ensure that "sub.example." exists and is marked as
1591  * a wildcard level.
1592  */
1593 static isc_result_t
1594 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
1595         isc_result_t result;
1596         dns_name_t foundname;
1597         dns_offsets_t offsets;
1598         unsigned int n;
1599         dns_rbtnode_t *node = NULL;
1600
1601         dns_name_init(&foundname, offsets);
1602         n = dns_name_countlabels(name);
1603         INSIST(n >= 2);
1604         n--;
1605         dns_name_getlabelsequence(name, 1, n, &foundname);
1606         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
1607         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
1608                 return (result);
1609         node->find_callback = 1;
1610         node->wild = 1;
1611         return (ISC_R_SUCCESS);
1612 }
1613
1614 static isc_result_t
1615 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
1616         isc_result_t result;
1617         dns_name_t foundname;
1618         dns_offsets_t offsets;
1619         unsigned int n, l, i;
1620
1621         dns_name_init(&foundname, offsets);
1622         n = dns_name_countlabels(name);
1623         l = dns_name_countlabels(&rbtdb->common.origin);
1624         i = l + 1;
1625         while (i < n) {
1626                 dns_rbtnode_t *node = NULL;     /* dummy */
1627                 dns_name_getlabelsequence(name, n - i, i, &foundname);
1628                 if (dns_name_iswildcard(&foundname)) {
1629                         result = add_wildcard_magic(rbtdb, &foundname);
1630                         if (result != ISC_R_SUCCESS)
1631                                 return (result);
1632                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
1633                                                  &node);
1634                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
1635                                 return (result);
1636                 }
1637                 i++;
1638         }
1639         return (ISC_R_SUCCESS);
1640 }
1641
1642 static isc_result_t
1643 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
1644          dns_dbnode_t **nodep)
1645 {
1646         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1647         dns_rbtnode_t *node = NULL;
1648         dns_name_t nodename;
1649         isc_result_t result;
1650         isc_rwlocktype_t locktype = isc_rwlocktype_read;
1651
1652         REQUIRE(VALID_RBTDB(rbtdb));
1653
1654         dns_name_init(&nodename, NULL);
1655         RWLOCK(&rbtdb->tree_lock, locktype);
1656         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
1657                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
1658         if (result != ISC_R_SUCCESS) {
1659                 RWUNLOCK(&rbtdb->tree_lock, locktype);
1660                 if (!create) {
1661                         if (result == DNS_R_PARTIALMATCH)
1662                                 result = ISC_R_NOTFOUND;
1663                         return (result);
1664                 }
1665                 /*
1666                  * It would be nice to try to upgrade the lock instead of
1667                  * unlocking then relocking.
1668                  */
1669                 locktype = isc_rwlocktype_write;
1670                 RWLOCK(&rbtdb->tree_lock, locktype);
1671                 node = NULL;
1672                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
1673                 if (result == ISC_R_SUCCESS) {
1674                         dns_rbt_namefromnode(node, &nodename);
1675 #ifdef DNS_RBT_USEHASH
1676                         node->locknum = node->hashval % rbtdb->node_lock_count;
1677 #else
1678                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
1679                                 rbtdb->node_lock_count;
1680 #endif
1681                         add_empty_wildcards(rbtdb, name);
1682
1683                         if (dns_name_iswildcard(name)) {
1684                                 result = add_wildcard_magic(rbtdb, name);
1685                                 if (result != ISC_R_SUCCESS) {
1686                                         RWUNLOCK(&rbtdb->tree_lock, locktype);
1687                                         return (result);
1688                                 }
1689                         }
1690                 } else if (result != ISC_R_EXISTS) {
1691                         RWUNLOCK(&rbtdb->tree_lock, locktype);
1692                         return (result);
1693                 }
1694         }
1695         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1696         new_reference(rbtdb, node);
1697         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1698         RWUNLOCK(&rbtdb->tree_lock, locktype);
1699
1700         *nodep = (dns_dbnode_t *)node;
1701
1702         return (ISC_R_SUCCESS);
1703 }
1704
1705 static isc_result_t
1706 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
1707         rbtdb_search_t *search = arg;
1708         rdatasetheader_t *header, *header_next;
1709         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
1710         rdatasetheader_t *found;
1711         isc_result_t result;
1712         dns_rbtnode_t *onode;
1713
1714         /*
1715          * We only want to remember the topmost zone cut, since it's the one
1716          * that counts, so we'll just continue if we've already found a
1717          * zonecut.
1718          */
1719         if (search->zonecut != NULL)
1720                 return (DNS_R_CONTINUE);
1721
1722         found = NULL;
1723         result = DNS_R_CONTINUE;
1724         onode = search->rbtdb->origin_node;
1725
1726         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
1727                   isc_rwlocktype_read);
1728
1729         /*
1730          * Look for an NS or DNAME rdataset active in our version.
1731          */
1732         ns_header = NULL;
1733         dname_header = NULL;
1734         sigdname_header = NULL;
1735         for (header = node->data; header != NULL; header = header_next) {
1736                 header_next = header->next;
1737                 if (header->type == dns_rdatatype_ns ||
1738                     header->type == dns_rdatatype_dname ||
1739                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
1740                         do {
1741                                 if (header->serial <= search->serial &&
1742                                     !IGNORE(header)) {
1743                                         /*
1744                                          * Is this a "this rdataset doesn't
1745                                          * exist" record?
1746                                          */
1747                                         if (NONEXISTENT(header))
1748                                                 header = NULL;
1749                                         break;
1750                                 } else
1751                                         header = header->down;
1752                         } while (header != NULL);
1753                         if (header != NULL) {
1754                                 if (header->type == dns_rdatatype_dname)
1755                                         dname_header = header;
1756                                 else if (header->type ==
1757                                            RBTDB_RDATATYPE_SIGDNAME)
1758                                         sigdname_header = header;
1759                                 else if (node != onode ||
1760                                          IS_STUB(search->rbtdb)) {
1761                                         /*
1762                                          * We've found an NS rdataset that
1763                                          * isn't at the origin node.  We check
1764                                          * that they're not at the origin node,
1765                                          * because otherwise we'd erroneously
1766                                          * treat the zone top as if it were
1767                                          * a delegation.
1768                                          */
1769                                         ns_header = header;
1770                                 }
1771                         }
1772                 }
1773         }
1774
1775         /*
1776          * Did we find anything?
1777          */
1778         if (dname_header != NULL) {
1779                 /*
1780                  * Note that DNAME has precedence over NS if both exist.
1781                  */
1782                 found = dname_header;
1783                 search->zonecut_sigrdataset = sigdname_header;
1784         } else if (ns_header != NULL) {
1785                 found = ns_header;
1786                 search->zonecut_sigrdataset = NULL;
1787         }
1788
1789         if (found != NULL) {
1790                 /*
1791                  * We increment the reference count on node to ensure that
1792                  * search->zonecut_rdataset will still be valid later.
1793                  */
1794                 new_reference(search->rbtdb, node);
1795                 search->zonecut = node;
1796                 search->zonecut_rdataset = found;
1797                 search->need_cleanup = ISC_TRUE;
1798                 /*
1799                  * Since we've found a zonecut, anything beneath it is
1800                  * glue and is not subject to wildcard matching, so we
1801                  * may clear search->wild.
1802                  */
1803                 search->wild = ISC_FALSE;
1804                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
1805                         /*
1806                          * If the caller does not want to find glue, then
1807                          * this is the best answer and the search should
1808                          * stop now.
1809                          */
1810                         result = DNS_R_PARTIALMATCH;
1811                 } else {
1812                         dns_name_t *zcname;
1813
1814                         /*
1815                          * The search will continue beneath the zone cut.
1816                          * This may or may not be the best match.  In case it
1817                          * is, we need to remember the node name.
1818                          */
1819                         zcname = dns_fixedname_name(&search->zonecut_name);
1820                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
1821                                       ISC_R_SUCCESS);
1822                         search->copy_name = ISC_TRUE;
1823                 }
1824         } else {
1825                 /*
1826                  * There is no zonecut at this node which is active in this
1827                  * version.
1828                  *
1829                  * If this is a "wild" node and the caller hasn't disabled
1830                  * wildcard matching, remember that we've seen a wild node
1831                  * in case we need to go searching for wildcard matches
1832                  * later on.
1833                  */
1834                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
1835                         search->wild = ISC_TRUE;
1836         }
1837
1838         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
1839                     isc_rwlocktype_read);
1840
1841         return (result);
1842 }
1843
1844 static inline void
1845 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1846               rdatasetheader_t *header, isc_stdtime_t now,
1847               dns_rdataset_t *rdataset)
1848 {
1849         unsigned char *raw;     /* RDATASLAB */
1850
1851         /*
1852          * Caller must be holding the node reader lock.
1853          * XXXJT: technically, we need a writer lock, since we'll increment
1854          * the header count below.  However, since the actual counter value
1855          * doesn't matter, we prioritize performance here.  (We may want to
1856          * use atomic increment when available).
1857          */
1858
1859         if (rdataset == NULL)
1860                 return;
1861
1862         new_reference(rbtdb, node);
1863
1864         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
1865
1866         rdataset->methods = &rdataset_methods;
1867         rdataset->rdclass = rbtdb->common.rdclass;
1868         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
1869         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
1870         rdataset->ttl = header->ttl - now;
1871         rdataset->trust = header->trust;
1872         if (NXDOMAIN(header))
1873                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
1874         rdataset->private1 = rbtdb;
1875         rdataset->private2 = node;
1876         raw = (unsigned char *)header + sizeof(*header);
1877         rdataset->private3 = raw;
1878         rdataset->count = header->count++;
1879         if (rdataset->count == ISC_UINT32_MAX)
1880                 rdataset->count = 0;
1881
1882         /*
1883          * Reset iterator state.
1884          */
1885         rdataset->privateuint4 = 0;
1886         rdataset->private5 = NULL;
1887
1888         /*
1889          * Add noqname proof.
1890          */
1891         rdataset->private6 = header->noqname;
1892         if (rdataset->private6 != NULL)
1893                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
1894 }
1895
1896 static inline isc_result_t
1897 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
1898                  dns_name_t *foundname, dns_rdataset_t *rdataset,
1899                  dns_rdataset_t *sigrdataset)
1900 {
1901         isc_result_t result;
1902         dns_name_t *zcname;
1903         rbtdb_rdatatype_t type;
1904         dns_rbtnode_t *node;
1905
1906         /*
1907          * The caller MUST NOT be holding any node locks.
1908          */
1909
1910         node = search->zonecut;
1911         type = search->zonecut_rdataset->type;
1912
1913         /*
1914          * If we have to set foundname, we do it before anything else.
1915          * If we were to set foundname after we had set nodep or bound the
1916          * rdataset, then we'd have to undo that work if dns_name_copy()
1917          * failed.  By setting foundname first, there's nothing to undo if
1918          * we have trouble.
1919          */
1920         if (foundname != NULL && search->copy_name) {
1921                 zcname = dns_fixedname_name(&search->zonecut_name);
1922                 result = dns_name_copy(zcname, foundname, NULL);
1923                 if (result != ISC_R_SUCCESS)
1924                         return (result);
1925         }
1926         if (nodep != NULL) {
1927                 /*
1928                  * Note that we don't have to increment the node's reference
1929                  * count here because we're going to use the reference we
1930                  * already have in the search block.
1931                  */
1932                 *nodep = node;
1933                 search->need_cleanup = ISC_FALSE;
1934         }
1935         if (rdataset != NULL) {
1936                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
1937                           isc_rwlocktype_read);
1938                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
1939                               search->now, rdataset);
1940                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
1941                         bind_rdataset(search->rbtdb, node,
1942                                       search->zonecut_sigrdataset,
1943                                       search->now, sigrdataset);
1944                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
1945                             isc_rwlocktype_read);
1946         }
1947
1948         if (type == dns_rdatatype_dname)
1949                 return (DNS_R_DNAME);
1950         return (DNS_R_DELEGATION);
1951 }
1952
1953 static inline isc_boolean_t
1954 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
1955            dns_rbtnode_t *node)
1956 {
1957         unsigned char *raw;     /* RDATASLAB */
1958         unsigned int count, size;
1959         dns_name_t ns_name;
1960         isc_boolean_t valid = ISC_FALSE;
1961         dns_offsets_t offsets;
1962         isc_region_t region;
1963         rdatasetheader_t *header;
1964
1965         /*
1966          * No additional locking is required.
1967          */
1968
1969         /*
1970          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
1971          * if it occurs at a zone cut, but is not valid below it.
1972          */
1973         if (type == dns_rdatatype_ns) {
1974                 if (node != search->zonecut) {
1975                         return (ISC_FALSE);
1976                 }
1977         } else if (type != dns_rdatatype_a &&
1978                    type != dns_rdatatype_aaaa &&
1979                    type != dns_rdatatype_a6) {
1980                 return (ISC_FALSE);
1981         }
1982
1983         header = search->zonecut_rdataset;
1984         raw = (unsigned char *)header + sizeof(*header);
1985         count = raw[0] * 256 + raw[1];
1986 #if DNS_RDATASET_FIXED
1987         raw += 2 + (4 * count);
1988 #else
1989         raw += 2;
1990 #endif
1991
1992         while (count > 0) {
1993                 count--;
1994                 size = raw[0] * 256 + raw[1];
1995 #if DNS_RDATASET_FIXED
1996                 raw += 4;
1997 #else
1998                 raw += 2;
1999 #endif
2000                 region.base = raw;
2001                 region.length = size;
2002                 raw += size;
2003                 /*
2004                  * XXX Until we have rdata structures, we have no choice but
2005                  * to directly access the rdata format.
2006                  */
2007                 dns_name_init(&ns_name, offsets);
2008                 dns_name_fromregion(&ns_name, &region);
2009                 if (dns_name_compare(&ns_name, name) == 0) {
2010                         valid = ISC_TRUE;
2011                         break;
2012                 }
2013         }
2014
2015         return (valid);
2016 }
2017
2018 static inline isc_boolean_t
2019 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2020             dns_name_t *name)
2021 {
2022         dns_fixedname_t fnext;
2023         dns_fixedname_t forigin;
2024         dns_name_t *next;
2025         dns_name_t *origin;
2026         dns_name_t prefix;
2027         dns_rbtdb_t *rbtdb;
2028         dns_rbtnode_t *node;
2029         isc_result_t result;
2030         isc_boolean_t answer = ISC_FALSE;
2031         rdatasetheader_t *header;
2032
2033         rbtdb = search->rbtdb;
2034
2035         dns_name_init(&prefix, NULL);
2036         dns_fixedname_init(&fnext);
2037         next = dns_fixedname_name(&fnext);
2038         dns_fixedname_init(&forigin);
2039         origin = dns_fixedname_name(&forigin);
2040
2041         result = dns_rbtnodechain_next(chain, NULL, NULL);
2042         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2043                 node = NULL;
2044                 result = dns_rbtnodechain_current(chain, &prefix,
2045                                                   origin, &node);
2046                 if (result != ISC_R_SUCCESS)
2047                         break;
2048                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2049                           isc_rwlocktype_read);
2050                 for (header = node->data;
2051                      header != NULL;
2052                      header = header->next) {
2053                         if (header->serial <= search->serial &&
2054                             !IGNORE(header) && EXISTS(header))
2055                                 break;
2056                 }
2057                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2058                             isc_rwlocktype_read);
2059                 if (header != NULL)
2060                         break;
2061                 result = dns_rbtnodechain_next(chain, NULL, NULL);
2062         }
2063         if (result == ISC_R_SUCCESS)
2064                 result = dns_name_concatenate(&prefix, origin, next, NULL);
2065         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2066                 answer = ISC_TRUE;
2067         return (answer);
2068 }
2069
2070 static inline isc_boolean_t
2071 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2072         dns_fixedname_t fnext;
2073         dns_fixedname_t forigin;
2074         dns_fixedname_t fprev;
2075         dns_name_t *next;
2076         dns_name_t *origin;
2077         dns_name_t *prev;
2078         dns_name_t name;
2079         dns_name_t rname;
2080         dns_name_t tname;
2081         dns_rbtdb_t *rbtdb;
2082         dns_rbtnode_t *node;
2083         dns_rbtnodechain_t chain;
2084         isc_boolean_t check_next = ISC_TRUE;
2085         isc_boolean_t check_prev = ISC_TRUE;
2086         isc_boolean_t answer = ISC_FALSE;
2087         isc_result_t result;
2088         rdatasetheader_t *header;
2089         unsigned int n;
2090
2091         rbtdb = search->rbtdb;
2092
2093         dns_name_init(&name, NULL);
2094         dns_name_init(&tname, NULL);
2095         dns_name_init(&rname, NULL);
2096         dns_fixedname_init(&fnext);
2097         next = dns_fixedname_name(&fnext);
2098         dns_fixedname_init(&fprev);
2099         prev = dns_fixedname_name(&fprev);
2100         dns_fixedname_init(&forigin);
2101         origin = dns_fixedname_name(&forigin);
2102
2103         /*
2104          * Find if qname is at or below a empty node.
2105          * Use our own copy of the chain.
2106          */
2107
2108         chain = search->chain;
2109         do {
2110                 node = NULL;
2111                 result = dns_rbtnodechain_current(&chain, &name,
2112                                                   origin, &node);
2113                 if (result != ISC_R_SUCCESS)
2114                         break;
2115                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2116                           isc_rwlocktype_read);
2117                 for (header = node->data;
2118                      header != NULL;
2119                      header = header->next) {
2120                         if (header->serial <= search->serial &&
2121                             !IGNORE(header) && EXISTS(header))
2122                                 break;
2123                 }
2124                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2125                             isc_rwlocktype_read);
2126                 if (header != NULL)
2127                         break;
2128                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
2129         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
2130         if (result == ISC_R_SUCCESS)
2131                 result = dns_name_concatenate(&name, origin, prev, NULL);
2132         if (result != ISC_R_SUCCESS)
2133                 check_prev = ISC_FALSE;
2134
2135         result = dns_rbtnodechain_next(&chain, NULL, NULL);
2136         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2137                 node = NULL;
2138                 result = dns_rbtnodechain_current(&chain, &name,
2139                                                   origin, &node);
2140                 if (result != ISC_R_SUCCESS)
2141                         break;
2142                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2143                           isc_rwlocktype_read);
2144                 for (header = node->data;
2145                      header != NULL;
2146                      header = header->next) {
2147                         if (header->serial <= search->serial &&
2148                             !IGNORE(header) && EXISTS(header))
2149                                 break;
2150                 }
2151                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2152                             isc_rwlocktype_read);
2153                 if (header != NULL)
2154                         break;
2155                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2156         }
2157         if (result == ISC_R_SUCCESS)
2158                 result = dns_name_concatenate(&name, origin, next, NULL);
2159         if (result != ISC_R_SUCCESS)
2160                 check_next = ISC_FALSE;
2161
2162         dns_name_clone(qname, &rname);
2163
2164         /*
2165          * Remove the wildcard label to find the terminal name.
2166          */
2167         n = dns_name_countlabels(wname);
2168         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
2169
2170         do {
2171                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
2172                     (check_next && dns_name_issubdomain(next, &rname))) {
2173                         answer = ISC_TRUE;
2174                         break;
2175                 }
2176                 /*
2177                  * Remove the left hand label.
2178                  */
2179                 n = dns_name_countlabels(&rname);
2180                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
2181         } while (!dns_name_equal(&rname, &tname));
2182         return (answer);
2183 }
2184
2185 static inline isc_result_t
2186 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
2187               dns_name_t *qname)
2188 {
2189         unsigned int i, j;
2190         dns_rbtnode_t *node, *level_node, *wnode;
2191         rdatasetheader_t *header;
2192         isc_result_t result = ISC_R_NOTFOUND;
2193         dns_name_t name;
2194         dns_name_t *wname;
2195         dns_fixedname_t fwname;
2196         dns_rbtdb_t *rbtdb;
2197         isc_boolean_t done, wild, active;
2198         dns_rbtnodechain_t wchain;
2199
2200         /*
2201          * Caller must be holding the tree lock and MUST NOT be holding
2202          * any node locks.
2203          */
2204
2205         /*
2206          * Examine each ancestor level.  If the level's wild bit
2207          * is set, then construct the corresponding wildcard name and
2208          * search for it.  If the wildcard node exists, and is active in
2209          * this version, we're done.  If not, then we next check to see
2210          * if the ancestor is active in this version.  If so, then there
2211          * can be no possible wildcard match and again we're done.  If not,
2212          * continue the search.
2213          */
2214
2215         rbtdb = search->rbtdb;
2216         i = search->chain.level_matches;
2217         done = ISC_FALSE;
2218         node = *nodep;
2219         do {
2220                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2221                           isc_rwlocktype_read);
2222
2223                 /*
2224                  * First we try to figure out if this node is active in
2225                  * the search's version.  We do this now, even though we
2226                  * may not need the information, because it simplifies the
2227                  * locking and code flow.
2228                  */
2229                 for (header = node->data;
2230                      header != NULL;
2231                      header = header->next) {
2232                         if (header->serial <= search->serial &&
2233                             !IGNORE(header) && EXISTS(header))
2234                                 break;
2235                 }
2236                 if (header != NULL)
2237                         active = ISC_TRUE;
2238                 else
2239                         active = ISC_FALSE;
2240
2241                 if (node->wild)
2242                         wild = ISC_TRUE;
2243                 else
2244                         wild = ISC_FALSE;
2245
2246                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2247                             isc_rwlocktype_read);
2248
2249                 if (wild) {
2250                         /*
2251                          * Construct the wildcard name for this level.
2252                          */
2253                         dns_name_init(&name, NULL);
2254                         dns_rbt_namefromnode(node, &name);
2255                         dns_fixedname_init(&fwname);
2256                         wname = dns_fixedname_name(&fwname);
2257                         result = dns_name_concatenate(dns_wildcardname, &name,
2258                                                       wname, NULL);
2259                         j = i;
2260                         while (result == ISC_R_SUCCESS && j != 0) {
2261                                 j--;
2262                                 level_node = search->chain.levels[j];
2263                                 dns_name_init(&name, NULL);
2264                                 dns_rbt_namefromnode(level_node, &name);
2265                                 result = dns_name_concatenate(wname,
2266                                                               &name,
2267                                                               wname,
2268                                                               NULL);
2269                         }
2270                         if (result != ISC_R_SUCCESS)
2271                                 break;
2272
2273                         wnode = NULL;
2274                         dns_rbtnodechain_init(&wchain, NULL);
2275                         result = dns_rbt_findnode(rbtdb->tree, wname,
2276                                                   NULL, &wnode, &wchain,
2277                                                   DNS_RBTFIND_EMPTYDATA,
2278                                                   NULL, NULL);
2279                         if (result == ISC_R_SUCCESS) {
2280                                 nodelock_t *lock;
2281
2282                                 /*
2283                                  * We have found the wildcard node.  If it
2284                                  * is active in the search's version, we're
2285                                  * done.
2286                                  */
2287                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
2288                                 NODE_LOCK(lock, isc_rwlocktype_read);
2289                                 for (header = wnode->data;
2290                                      header != NULL;
2291                                      header = header->next) {
2292                                         if (header->serial <= search->serial &&
2293                                             !IGNORE(header) && EXISTS(header))
2294                                                 break;
2295                                 }
2296                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
2297                                 if (header != NULL ||
2298                                     activeempty(search, &wchain, wname)) {
2299                                         if (activeemtpynode(search, qname,
2300                                                             wname)) {
2301                                                 return (ISC_R_NOTFOUND);
2302                                         }
2303                                         /*
2304                                          * The wildcard node is active!
2305                                          *
2306                                          * Note: result is still ISC_R_SUCCESS
2307                                          * so we don't have to set it.
2308                                          */
2309                                         *nodep = wnode;
2310                                         break;
2311                                 }
2312                         } else if (result != ISC_R_NOTFOUND &&
2313                                    result != DNS_R_PARTIALMATCH) {
2314                                 /*
2315                                  * An error has occurred.  Bail out.
2316                                  */
2317                                 break;
2318                         }
2319                 }
2320
2321                 if (active) {
2322                         /*
2323                          * The level node is active.  Any wildcarding
2324                          * present at higher levels has no
2325                          * effect and we're done.
2326                          */
2327                         result = ISC_R_NOTFOUND;
2328                         break;
2329                 }
2330
2331                 if (i > 0) {
2332                         i--;
2333                         node = search->chain.levels[i];
2334                 } else
2335                         done = ISC_TRUE;
2336         } while (!done);
2337
2338         return (result);
2339 }
2340
2341 static inline isc_result_t
2342 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
2343                   dns_name_t *foundname, dns_rdataset_t *rdataset,
2344                   dns_rdataset_t *sigrdataset, isc_boolean_t need_sig)
2345 {
2346         dns_rbtnode_t *node;
2347         rdatasetheader_t *header, *header_next, *found, *foundsig;
2348         isc_boolean_t empty_node;
2349         isc_result_t result;
2350         dns_fixedname_t fname, forigin;
2351         dns_name_t *name, *origin;
2352
2353         do {
2354                 node = NULL;
2355                 dns_fixedname_init(&fname);
2356                 name = dns_fixedname_name(&fname);
2357                 dns_fixedname_init(&forigin);
2358                 origin = dns_fixedname_name(&forigin);
2359                 result = dns_rbtnodechain_current(&search->chain, name,
2360                                                   origin, &node);
2361                 if (result != ISC_R_SUCCESS)
2362                         return (result);
2363                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2364                           isc_rwlocktype_read);
2365                 found = NULL;
2366                 foundsig = NULL;
2367                 empty_node = ISC_TRUE;
2368                 for (header = node->data;
2369                      header != NULL;
2370                      header = header_next) {
2371                         header_next = header->next;
2372                         /*
2373                          * Look for an active, extant NSEC or RRSIG NSEC.
2374                          */
2375                         do {
2376                                 if (header->serial <= search->serial &&
2377                                     !IGNORE(header)) {
2378                                         /*
2379                                          * Is this a "this rdataset doesn't
2380                                          * exist" record?
2381                                          */
2382                                         if (NONEXISTENT(header))
2383                                                 header = NULL;
2384                                         break;
2385                                 } else
2386                                         header = header->down;
2387                         } while (header != NULL);
2388                         if (header != NULL) {
2389                                 /*
2390                                  * We now know that there is at least one
2391                                  * active rdataset at this node.
2392                                  */
2393                                 empty_node = ISC_FALSE;
2394                                 if (header->type == dns_rdatatype_nsec) {
2395                                         found = header;
2396                                         if (foundsig != NULL)
2397                                                 break;
2398                                 } else if (header->type ==
2399                                            RBTDB_RDATATYPE_SIGNSEC) {
2400                                         foundsig = header;
2401                                         if (found != NULL)
2402                                                 break;
2403                                 }
2404                         }
2405                 }
2406                 if (!empty_node) {
2407                         if (found != NULL &&
2408                             (foundsig != NULL || !need_sig))
2409                         {
2410                                 /*
2411                                  * We've found the right NSEC record.
2412                                  *
2413                                  * Note: for this to really be the right
2414                                  * NSEC record, it's essential that the NSEC
2415                                  * records of any nodes obscured by a zone
2416                                  * cut have been removed; we assume this is
2417                                  * the case.
2418                                  */
2419                                 result = dns_name_concatenate(name, origin,
2420                                                               foundname, NULL);
2421                                 if (result == ISC_R_SUCCESS) {
2422                                         if (nodep != NULL) {
2423                                                 new_reference(search->rbtdb,
2424                                                               node);
2425                                                 *nodep = node;
2426                                         }
2427                                         bind_rdataset(search->rbtdb, node,
2428                                                       found, search->now,
2429                                                       rdataset);
2430                                         if (foundsig != NULL)
2431                                                 bind_rdataset(search->rbtdb,
2432                                                               node,
2433                                                               foundsig,
2434                                                               search->now,
2435                                                               sigrdataset);
2436                                 }
2437                         } else if (found == NULL && foundsig == NULL) {
2438                                 /*
2439                                  * This node is active, but has no NSEC or
2440                                  * RRSIG NSEC.  That means it's glue or
2441                                  * other obscured zone data that isn't
2442                                  * relevant for our search.  Treat the
2443                                  * node as if it were empty and keep looking.
2444                                  */
2445                                 empty_node = ISC_TRUE;
2446                                 result = dns_rbtnodechain_prev(&search->chain,
2447                                                                NULL, NULL);
2448                         } else {
2449                                 /*
2450                                  * We found an active node, but either the
2451                                  * NSEC or the RRSIG NSEC is missing.  This
2452                                  * shouldn't happen.
2453                                  */
2454                                 result = DNS_R_BADDB;
2455                         }
2456                 } else {
2457                         /*
2458                          * This node isn't active.  We've got to keep
2459                          * looking.
2460                          */
2461                         result = dns_rbtnodechain_prev(&search->chain, NULL,
2462                                                        NULL);
2463                 }
2464                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2465                             isc_rwlocktype_read);
2466         } while (empty_node && result == ISC_R_SUCCESS);
2467
2468         /*
2469          * If the result is ISC_R_NOMORE, then we got to the beginning of
2470          * the database and didn't find a NSEC record.  This shouldn't
2471          * happen.
2472          */
2473         if (result == ISC_R_NOMORE)
2474                 result = DNS_R_BADDB;
2475
2476         return (result);
2477 }
2478
2479 static isc_result_t
2480 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
2481           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
2482           dns_dbnode_t **nodep, dns_name_t *foundname,
2483           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
2484 {
2485         dns_rbtnode_t *node = NULL;
2486         isc_result_t result;
2487         rbtdb_search_t search;
2488         isc_boolean_t cname_ok = ISC_TRUE;
2489         isc_boolean_t close_version = ISC_FALSE;
2490         isc_boolean_t maybe_zonecut = ISC_FALSE;
2491         isc_boolean_t at_zonecut = ISC_FALSE;
2492         isc_boolean_t wild;
2493         isc_boolean_t empty_node;
2494         rdatasetheader_t *header, *header_next, *found, *nsecheader;
2495         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
2496         rbtdb_rdatatype_t sigtype;
2497         isc_boolean_t active;
2498         dns_rbtnodechain_t chain;
2499         nodelock_t *lock;
2500
2501
2502         search.rbtdb = (dns_rbtdb_t *)db;
2503
2504         REQUIRE(VALID_RBTDB(search.rbtdb));
2505
2506         /*
2507          * We don't care about 'now'.
2508          */
2509         UNUSED(now);
2510
2511         /*
2512          * If the caller didn't supply a version, attach to the current
2513          * version.
2514          */
2515         if (version == NULL) {
2516                 currentversion(db, &version);
2517                 close_version = ISC_TRUE;
2518         }
2519
2520         search.rbtversion = version;
2521         search.serial = search.rbtversion->serial;
2522         search.options = options;
2523         search.copy_name = ISC_FALSE;
2524         search.need_cleanup = ISC_FALSE;
2525         search.wild = ISC_FALSE;
2526         search.zonecut = NULL;
2527         dns_fixedname_init(&search.zonecut_name);
2528         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
2529         search.now = 0;
2530
2531         /*
2532          * 'wild' will be true iff. we've matched a wildcard.
2533          */
2534         wild = ISC_FALSE;
2535
2536         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
2537
2538         /*
2539          * Search down from the root of the tree.  If, while going down, we
2540          * encounter a callback node, zone_zonecut_callback() will search the
2541          * rdatasets at the zone cut for active DNAME or NS rdatasets.
2542          */
2543         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
2544                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
2545                                   zone_zonecut_callback, &search);
2546
2547         if (result == DNS_R_PARTIALMATCH) {
2548         partial_match:
2549                 if (search.zonecut != NULL) {
2550                     result = setup_delegation(&search, nodep, foundname,
2551                                               rdataset, sigrdataset);
2552                     goto tree_exit;
2553                 }
2554
2555                 if (search.wild) {
2556                         /*
2557                          * At least one of the levels in the search chain
2558                          * potentially has a wildcard.  For each such level,
2559                          * we must see if there's a matching wildcard active
2560                          * in the current version.
2561                          */
2562                         result = find_wildcard(&search, &node, name);
2563                         if (result == ISC_R_SUCCESS) {
2564                                 result = dns_name_copy(name, foundname, NULL);
2565                                 if (result != ISC_R_SUCCESS)
2566                                         goto tree_exit;
2567                                 wild = ISC_TRUE;
2568                                 goto found;
2569                         }
2570                         else if (result != ISC_R_NOTFOUND)
2571                                 goto tree_exit;
2572                 }
2573
2574                 chain = search.chain;
2575                 active = activeempty(&search, &chain, name);
2576
2577                 /*
2578                  * If we're here, then the name does not exist, is not
2579                  * beneath a zonecut, and there's no matching wildcard.
2580                  */
2581                 if (search.rbtdb->secure ||
2582                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
2583                 {
2584                         result = find_closest_nsec(&search, nodep, foundname,
2585                                                   rdataset, sigrdataset,
2586                                                   search.rbtdb->secure);
2587                         if (result == ISC_R_SUCCESS)
2588                                 result = active ? DNS_R_EMPTYNAME :
2589                                                   DNS_R_NXDOMAIN;
2590                 } else
2591                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
2592                 goto tree_exit;
2593         } else if (result != ISC_R_SUCCESS)
2594                 goto tree_exit;
2595
2596  found:
2597         /*
2598          * We have found a node whose name is the desired name, or we
2599          * have matched a wildcard.
2600          */
2601
2602         if (search.zonecut != NULL) {
2603                 /*
2604                  * If we're beneath a zone cut, we don't want to look for
2605                  * CNAMEs because they're not legitimate zone glue.
2606                  */
2607                 cname_ok = ISC_FALSE;
2608         } else {
2609                 /*
2610                  * The node may be a zone cut itself.  If it might be one,
2611                  * make sure we check for it later.
2612                  */
2613                 if (node->find_callback &&
2614                     (node != search.rbtdb->origin_node ||
2615                      IS_STUB(search.rbtdb)) &&
2616                     !dns_rdatatype_atparent(type))
2617                         maybe_zonecut = ISC_TRUE;
2618         }
2619
2620         /*
2621          * Certain DNSSEC types are not subject to CNAME matching
2622          * (RFC4035, section 2.5 and RFC3007).
2623          *
2624          * We don't check for RRSIG, because we don't store RRSIG records
2625          * directly.
2626          */
2627         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
2628                 cname_ok = ISC_FALSE;
2629
2630         /*
2631          * We now go looking for rdata...
2632          */
2633
2634         NODE_LOCK(&(search.rbtdb->node_locks[node->locknum].lock),
2635                   isc_rwlocktype_read);
2636
2637         found = NULL;
2638         foundsig = NULL;
2639         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
2640         nsecheader = NULL;
2641         nsecsig = NULL;
2642         cnamesig = NULL;
2643         empty_node = ISC_TRUE;
2644         for (header = node->data; header != NULL; header = header_next) {
2645                 header_next = header->next;
2646                 /*
2647                  * Look for an active, extant rdataset.
2648                  */
2649                 do {
2650                         if (header->serial <= search.serial &&
2651                             !IGNORE(header)) {
2652                                 /*
2653                                  * Is this a "this rdataset doesn't
2654                                  * exist" record?
2655                                  */
2656                                 if (NONEXISTENT(header))
2657                                         header = NULL;
2658                                 break;
2659                         } else
2660                                 header = header->down;
2661                 } while (header != NULL);
2662                 if (header != NULL) {
2663                         /*
2664                          * We now know that there is at least one active
2665                          * rdataset at this node.
2666                          */
2667                         empty_node = ISC_FALSE;
2668
2669                         /*
2670                          * Do special zone cut handling, if requested.
2671                          */
2672                         if (maybe_zonecut &&
2673                             header->type == dns_rdatatype_ns) {
2674                                 /*
2675                                  * We increment the reference count on node to
2676                                  * ensure that search->zonecut_rdataset will
2677                                  * still be valid later.
2678                                  */
2679                                 new_reference(search.rbtdb, node);
2680                                 search.zonecut = node;
2681                                 search.zonecut_rdataset = header;
2682                                 search.zonecut_sigrdataset = NULL;
2683                                 search.need_cleanup = ISC_TRUE;
2684                                 maybe_zonecut = ISC_FALSE;
2685                                 at_zonecut = ISC_TRUE;
2686                                 /*
2687                                  * It is not clear if KEY should still be
2688                                  * allowed at the parent side of the zone
2689                                  * cut or not.  It is needed for RFC3007
2690                                  * validated updates.
2691                                  */
2692                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
2693                                     && type != dns_rdatatype_nsec
2694                                     && type != dns_rdatatype_key) {
2695                                         /*
2696                                          * Glue is not OK, but any answer we
2697                                          * could return would be glue.  Return
2698                                          * the delegation.
2699                                          */
2700                                         found = NULL;
2701                                         break;
2702                                 }
2703                                 if (found != NULL && foundsig != NULL)
2704                                         break;
2705                         }
2706
2707                         /*
2708                          * If we found a type we were looking for,
2709                          * remember it.
2710                          */
2711                         if (header->type == type ||
2712                             type == dns_rdatatype_any ||
2713                             (header->type == dns_rdatatype_cname &&
2714                              cname_ok)) {
2715                                 /*
2716                                  * We've found the answer!
2717                                  */
2718                                 found = header;
2719                                 if (header->type == dns_rdatatype_cname &&
2720                                     cname_ok) {
2721                                         /*
2722                                          * We may be finding a CNAME instead
2723                                          * of the desired type.
2724                                          *
2725                                          * If we've already got the CNAME RRSIG,
2726                                          * use it, otherwise change sigtype
2727                                          * so that we find it.
2728                                          */
2729                                         if (cnamesig != NULL)
2730                                                 foundsig = cnamesig;
2731                                         else
2732                                                 sigtype =
2733                                                     RBTDB_RDATATYPE_SIGCNAME;
2734                                 }
2735                                 /*
2736                                  * If we've got all we need, end the search.
2737                                  */
2738                                 if (!maybe_zonecut && foundsig != NULL)
2739                                         break;
2740                         } else if (header->type == sigtype) {
2741                                 /*
2742                                  * We've found the RRSIG rdataset for our
2743                                  * target type.  Remember it.
2744                                  */
2745                                 foundsig = header;
2746                                 /*
2747                                  * If we've got all we need, end the search.
2748                                  */
2749                                 if (!maybe_zonecut && found != NULL)
2750                                         break;
2751                         } else if (header->type == dns_rdatatype_nsec) {
2752                                 /*
2753                                  * Remember a NSEC rdataset even if we're
2754                                  * not specifically looking for it, because
2755                                  * we might need it later.
2756                                  */
2757                                 nsecheader = header;
2758                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC) {
2759                                 /*
2760                                  * If we need the NSEC rdataset, we'll also
2761                                  * need its signature.
2762                                  */
2763                                 nsecsig = header;
2764                         } else if (cname_ok &&
2765                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
2766                                 /*
2767                                  * If we get a CNAME match, we'll also need
2768                                  * its signature.
2769                                  */
2770                                 cnamesig = header;
2771                         }
2772                 }
2773         }
2774
2775         if (empty_node) {
2776                 /*
2777                  * We have an exact match for the name, but there are no
2778                  * active rdatasets in the desired version.  That means that
2779                  * this node doesn't exist in the desired version, and that
2780                  * we really have a partial match.
2781                  */
2782                 if (!wild) {
2783                         lock = &search.rbtdb->node_locks[node->locknum].lock;
2784                         NODE_UNLOCK(lock, isc_rwlocktype_read);
2785                         goto partial_match;
2786                 }
2787         }
2788
2789         /*
2790          * If we didn't find what we were looking for...
2791          */
2792         if (found == NULL) {
2793                 if (search.zonecut != NULL) {
2794                         /*
2795                          * We were trying to find glue at a node beneath a
2796                          * zone cut, but didn't.
2797                          *
2798                          * Return the delegation.
2799                          */
2800                         lock = &search.rbtdb->node_locks[node->locknum].lock;
2801                         NODE_UNLOCK(lock, isc_rwlocktype_read);
2802                         result = setup_delegation(&search, nodep, foundname,
2803                                                   rdataset, sigrdataset);
2804                         goto tree_exit;
2805                 }
2806                 /*
2807                  * The desired type doesn't exist.
2808                  */
2809                 result = DNS_R_NXRRSET;
2810                 if (search.rbtdb->secure &&
2811                     (nsecheader == NULL || nsecsig == NULL)) {
2812                         /*
2813                          * The zone is secure but there's no NSEC,
2814                          * or the NSEC has no signature!
2815                          */
2816                         if (!wild) {
2817                                 result = DNS_R_BADDB;
2818                                 goto node_exit;
2819                         }
2820
2821                         lock = &search.rbtdb->node_locks[node->locknum].lock;
2822                         NODE_UNLOCK(lock, isc_rwlocktype_read);
2823                         result = find_closest_nsec(&search, nodep, foundname,
2824                                                    rdataset, sigrdataset,
2825                                                    search.rbtdb->secure);
2826                         if (result == ISC_R_SUCCESS)
2827                                 result = DNS_R_EMPTYWILD;
2828                         goto tree_exit;
2829                 }
2830                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
2831                     nsecheader == NULL)
2832                 {
2833                         /*
2834                          * There's no NSEC record, and we were told
2835                          * to find one.
2836                          */
2837                         result = DNS_R_BADDB;
2838                         goto node_exit;
2839                 }
2840                 if (nodep != NULL) {
2841                         new_reference(search.rbtdb, node);
2842                         *nodep = node;
2843                 }
2844                 if (search.rbtdb->secure ||
2845                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
2846                 {
2847                         bind_rdataset(search.rbtdb, node, nsecheader,
2848                                       0, rdataset);
2849                         if (nsecsig != NULL)
2850                                 bind_rdataset(search.rbtdb, node,
2851                                               nsecsig, 0, sigrdataset);
2852                 }
2853                 if (wild)
2854                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
2855                 goto node_exit;
2856         }
2857
2858         /*
2859          * We found what we were looking for, or we found a CNAME.
2860          */
2861
2862         if (type != found->type &&
2863             type != dns_rdatatype_any &&
2864             found->type == dns_rdatatype_cname) {
2865                 /*
2866                  * We weren't doing an ANY query and we found a CNAME instead
2867                  * of the type we were looking for, so we need to indicate
2868                  * that result to the caller.
2869                  */
2870                 result = DNS_R_CNAME;
2871         } else if (search.zonecut != NULL) {
2872                 /*
2873                  * If we're beneath a zone cut, we must indicate that the
2874                  * result is glue, unless we're actually at the zone cut
2875                  * and the type is NSEC or KEY.
2876                  */
2877                 if (search.zonecut == node) {
2878                         /*
2879                          * It is not clear if KEY should still be
2880                          * allowed at the parent side of the zone
2881                          * cut or not.  It is needed for RFC3007
2882                          * validated updates.
2883                          */
2884                         if (type == dns_rdatatype_nsec ||
2885                             type == dns_rdatatype_key)
2886                                 result = ISC_R_SUCCESS;
2887                         else if (type == dns_rdatatype_any)
2888                                 result = DNS_R_ZONECUT;
2889                         else
2890                                 result = DNS_R_GLUE;
2891                 } else
2892                         result = DNS_R_GLUE;
2893                 /*
2894                  * We might have found data that isn't glue, but was occluded
2895                  * by a dynamic update.  If the caller cares about this, they
2896                  * will have told us to validate glue.
2897                  *
2898                  * XXX We should cache the glue validity state!
2899                  */
2900                 if (result == DNS_R_GLUE &&
2901                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
2902                     !valid_glue(&search, foundname, type, node)) {
2903                         lock = &search.rbtdb->node_locks[node->locknum].lock;
2904                         NODE_UNLOCK(lock, isc_rwlocktype_read);
2905                         result = setup_delegation(&search, nodep, foundname,
2906                                                   rdataset, sigrdataset);
2907                     goto tree_exit;
2908                 }
2909         } else {
2910                 /*
2911                  * An ordinary successful query!
2912                  */
2913                 result = ISC_R_SUCCESS;
2914         }
2915
2916         if (nodep != NULL) {
2917                 if (!at_zonecut)
2918                         new_reference(search.rbtdb, node);
2919                 else
2920                         search.need_cleanup = ISC_FALSE;
2921                 *nodep = node;
2922         }
2923
2924         if (type != dns_rdatatype_any) {
2925                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
2926                 if (foundsig != NULL)
2927                         bind_rdataset(search.rbtdb, node, foundsig, 0,
2928                                       sigrdataset);
2929         }
2930
2931         if (wild)
2932                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
2933
2934  node_exit:
2935         NODE_UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock),
2936                     isc_rwlocktype_read);
2937
2938  tree_exit:
2939         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
2940
2941         /*
2942          * If we found a zonecut but aren't going to use it, we have to
2943          * let go of it.
2944          */
2945         if (search.need_cleanup) {
2946                 node = search.zonecut;
2947                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
2948
2949                 NODE_LOCK(lock, isc_rwlocktype_read);
2950                 decrement_reference(search.rbtdb, node, 0,
2951                                     isc_rwlocktype_read, isc_rwlocktype_none);
2952                 NODE_UNLOCK(lock, isc_rwlocktype_read);
2953         }
2954
2955         if (close_version)
2956                 closeversion(db, &version, ISC_FALSE);
2957
2958         dns_rbtnodechain_reset(&search.chain);
2959
2960         return (result);
2961 }
2962
2963 static isc_result_t
2964 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
2965                  isc_stdtime_t now, dns_dbnode_t **nodep,
2966                  dns_name_t *foundname,
2967                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
2968 {
2969         UNUSED(db);
2970         UNUSED(name);
2971         UNUSED(options);
2972         UNUSED(now);
2973         UNUSED(nodep);
2974         UNUSED(foundname);
2975         UNUSED(rdataset);
2976         UNUSED(sigrdataset);
2977
2978         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
2979
2980         return (ISC_R_NOTIMPLEMENTED);
2981 }
2982
2983 static isc_result_t
2984 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2985         rbtdb_search_t *search = arg;
2986         rdatasetheader_t *header, *header_prev, *header_next;
2987         rdatasetheader_t *dname_header, *sigdname_header;
2988         isc_result_t result;
2989         nodelock_t *lock;
2990         isc_rwlocktype_t locktype;
2991
2992         /* XXX comment */
2993
2994         REQUIRE(search->zonecut == NULL);
2995
2996         /*
2997          * Keep compiler silent.
2998          */
2999         UNUSED(name);
3000
3001         lock = &(search->rbtdb->node_locks[node->locknum].lock);
3002         locktype = isc_rwlocktype_read;
3003         NODE_LOCK(lock, locktype);
3004
3005         /*
3006          * Look for a DNAME or RRSIG DNAME rdataset.
3007          */
3008         dname_header = NULL;
3009         sigdname_header = NULL;
3010         header_prev = NULL;
3011         for (header = node->data; header != NULL; header = header_next) {
3012                 header_next = header->next;
3013                 if (header->ttl <= search->now) {
3014                         /*
3015                          * This rdataset is stale.  If no one else is
3016                          * using the node, we can clean it up right
3017                          * now, otherwise we mark it as stale, and
3018                          * the node as dirty, so it will get cleaned
3019                          * up later.
3020                          */
3021                         if ((header->ttl <= search->now - RBTDB_VIRTUAL) &&
3022                             (locktype == isc_rwlocktype_write ||
3023                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3024                                 /*
3025                                  * We update the node's status only when we
3026                                  * can get write access; otherwise, we leave
3027                                  * others to this work.  Periodical cleaning
3028                                  * will eventually take the job as the last
3029                                  * resort.
3030                                  * We won't downgrade the lock, since other
3031                                  * rdatasets are probably stale, too.
3032                                  */
3033                                 locktype = isc_rwlocktype_write;
3034
3035                                 if (dns_rbtnode_refcurrent(node) == 0) {
3036                                         isc_mem_t *mctx;
3037
3038                                         /*
3039                                          * header->down can be non-NULL if the
3040                                          * refcount has just decremented to 0
3041                                          * but decrement_reference() has not
3042                                          * performed clean_cache_node(), in
3043                                          * which case we need to purge the
3044                                          * stale headers first.
3045                                          */
3046                                         mctx = search->rbtdb->common.mctx;
3047                                         clean_stale_headers(mctx, header);
3048                                         if (header_prev != NULL)
3049                                                 header_prev->next =
3050                                                         header->next;
3051                                         else
3052                                                 node->data = header->next;
3053                                         free_rdataset(mctx, header);
3054                                 } else {
3055                                         header->attributes |=
3056                                                 RDATASET_ATTR_STALE;
3057                                         node->dirty = 1;
3058                                         header_prev = header;
3059                                 }
3060                         } else
3061                                 header_prev = header;
3062                 } else if (header->type == dns_rdatatype_dname &&
3063                            EXISTS(header)) {
3064                         dname_header = header;
3065                         header_prev = header;
3066                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
3067                          EXISTS(header)) {
3068                         sigdname_header = header;
3069                         header_prev = header;
3070                 } else
3071                         header_prev = header;
3072         }
3073
3074         if (dname_header != NULL &&
3075             (dname_header->trust != dns_trust_pending ||
3076              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
3077                 /*
3078                  * We increment the reference count on node to ensure that
3079                  * search->zonecut_rdataset will still be valid later.
3080                  */
3081                 new_reference(search->rbtdb, node);
3082                 search->zonecut = node;
3083                 search->zonecut_rdataset = dname_header;
3084                 search->zonecut_sigrdataset = sigdname_header;
3085                 search->need_cleanup = ISC_TRUE;
3086                 result = DNS_R_PARTIALMATCH;
3087         } else
3088                 result = DNS_R_CONTINUE;
3089
3090         NODE_UNLOCK(lock, locktype);
3091
3092         return (result);
3093 }
3094
3095 static inline isc_result_t
3096 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
3097                      dns_dbnode_t **nodep, dns_name_t *foundname,
3098                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3099 {
3100         unsigned int i;
3101         dns_rbtnode_t *level_node;
3102         rdatasetheader_t *header, *header_prev, *header_next;
3103         rdatasetheader_t *found, *foundsig;
3104         isc_result_t result = ISC_R_NOTFOUND;
3105         dns_name_t name;
3106         dns_rbtdb_t *rbtdb;
3107         isc_boolean_t done;
3108         nodelock_t *lock;
3109         isc_rwlocktype_t locktype;
3110
3111         /*
3112          * Caller must be holding the tree lock.
3113          */
3114
3115         rbtdb = search->rbtdb;
3116         i = search->chain.level_matches;
3117         done = ISC_FALSE;
3118         do {
3119                 locktype = isc_rwlocktype_read;
3120                 lock = &rbtdb->node_locks[node->locknum].lock;
3121                 NODE_LOCK(lock, locktype);
3122
3123                 /*
3124                  * Look for NS and RRSIG NS rdatasets.
3125                  */
3126                 found = NULL;
3127                 foundsig = NULL;
3128                 header_prev = NULL;
3129                 for (header = node->data;
3130                      header != NULL;
3131                      header = header_next) {
3132                         header_next = header->next;
3133                         if (header->ttl <= search->now) {
3134                                 /*
3135                                  * This rdataset is stale.  If no one else is
3136                                  * using the node, we can clean it up right
3137                                  * now, otherwise we mark it as stale, and
3138                                  * the node as dirty, so it will get cleaned
3139                                  * up later.
3140                                  */
3141                                 if ((header->ttl <= search->now -
3142                                                     RBTDB_VIRTUAL) &&
3143                                     (locktype == isc_rwlocktype_write ||
3144                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3145                                         /*
3146                                          * We update the node's status only
3147                                          * when we can get write access.
3148                                          */
3149                                         locktype = isc_rwlocktype_write;
3150
3151                                         if (dns_rbtnode_refcurrent(node)
3152                                             == 0) {
3153                                                 isc_mem_t *m;
3154
3155                                                 m = search->rbtdb->common.mctx;
3156                                                 clean_stale_headers(m, header);
3157                                                 if (header_prev != NULL)
3158                                                         header_prev->next =
3159                                                                 header->next;
3160                                                 else
3161                                                         node->data =
3162                                                                 header->next;
3163                                                 free_rdataset(m, header);
3164                                         } else {
3165                                                 header->attributes |=
3166                                                         RDATASET_ATTR_STALE;
3167                                                 node->dirty = 1;
3168                                                 header_prev = header;
3169                                         }
3170                                 } else
3171                                         header_prev = header;
3172                         } else if (EXISTS(header)) {
3173                                 /*
3174                                  * We've found an extant rdataset.  See if
3175                                  * we're interested in it.
3176                                  */
3177                                 if (header->type == dns_rdatatype_ns) {
3178                                         found = header;
3179                                         if (foundsig != NULL)
3180                                                 break;
3181                                 } else if (header->type ==
3182                                            RBTDB_RDATATYPE_SIGNS) {
3183                                         foundsig = header;
3184                                         if (found != NULL)
3185                                                 break;
3186                                 }
3187                                 header_prev = header;
3188                         } else
3189                                 header_prev = header;
3190                 }
3191
3192                 if (found != NULL) {
3193                         /*
3194                          * If we have to set foundname, we do it before
3195                          * anything else.  If we were to set foundname after
3196                          * we had set nodep or bound the rdataset, then we'd
3197                          * have to undo that work if dns_name_concatenate()
3198                          * failed.  By setting foundname first, there's
3199                          * nothing to undo if we have trouble.
3200                          */
3201                         if (foundname != NULL) {
3202                                 dns_name_init(&name, NULL);
3203                                 dns_rbt_namefromnode(node, &name);
3204                                 result = dns_name_copy(&name, foundname, NULL);
3205                                 while (result == ISC_R_SUCCESS && i > 0) {
3206                                         i--;
3207                                         level_node = search->chain.levels[i];
3208                                         dns_name_init(&name, NULL);
3209                                         dns_rbt_namefromnode(level_node,
3210                                                              &name);
3211                                         result =
3212                                                 dns_name_concatenate(foundname,
3213                                                                      &name,
3214                                                                      foundname,
3215                                                                      NULL);
3216                                 }
3217                                 if (result != ISC_R_SUCCESS) {
3218                                         *nodep = NULL;
3219                                         goto node_exit;
3220                                 }
3221                         }
3222                         result = DNS_R_DELEGATION;
3223                         if (nodep != NULL) {
3224                                 new_reference(search->rbtdb, node);
3225                                 *nodep = node;
3226                         }
3227                         bind_rdataset(search->rbtdb, node, found, search->now,
3228                                       rdataset);
3229                         if (foundsig != NULL)
3230                                 bind_rdataset(search->rbtdb, node, foundsig,
3231                                               search->now, sigrdataset);
3232                 }
3233
3234         node_exit:
3235                 NODE_UNLOCK(lock, locktype);
3236
3237                 if (found == NULL && i > 0) {
3238                         i--;
3239                         node = search->chain.levels[i];
3240                 } else
3241                         done = ISC_TRUE;
3242
3243         } while (!done);
3244
3245         return (result);
3246 }
3247
3248 static isc_result_t
3249 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3250                   isc_stdtime_t now, dns_name_t *foundname,
3251                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3252 {
3253         dns_rbtnode_t *node;
3254         rdatasetheader_t *header, *header_next, *header_prev;
3255         rdatasetheader_t *found, *foundsig;
3256         isc_boolean_t empty_node;
3257         isc_result_t result;
3258         dns_fixedname_t fname, forigin;
3259         dns_name_t *name, *origin;
3260         rbtdb_rdatatype_t matchtype, sigmatchtype;
3261         nodelock_t *lock;
3262         isc_rwlocktype_t locktype;
3263
3264         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
3265         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
3266                                              dns_rdatatype_nsec);
3267
3268         do {
3269                 node = NULL;
3270                 dns_fixedname_init(&fname);
3271                 name = dns_fixedname_name(&fname);
3272                 dns_fixedname_init(&forigin);
3273                 origin = dns_fixedname_name(&forigin);
3274                 result = dns_rbtnodechain_current(&search->chain, name,
3275                                                   origin, &node);
3276                 if (result != ISC_R_SUCCESS)
3277                         return (result);
3278                 locktype = isc_rwlocktype_read;
3279                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
3280                 NODE_LOCK(lock, locktype);
3281                 found = NULL;
3282                 foundsig = NULL;
3283                 empty_node = ISC_TRUE;
3284                 header_prev = NULL;
3285                 for (header = node->data;
3286                      header != NULL;
3287                      header = header_next) {
3288                         header_next = header->next;
3289                         if (header->ttl <= now) {
3290                                 /*
3291                                  * This rdataset is stale.  If no one else is
3292                                  * using the node, we can clean it up right
3293                                  * now, otherwise we mark it as stale, and the
3294                                  * node as dirty, so it will get cleaned up
3295                                  * later.
3296                                  */
3297                                 if ((header->ttl <= now - RBTDB_VIRTUAL) &&
3298                                     (locktype == isc_rwlocktype_write ||
3299                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3300                                         /*
3301                                          * We update the node's status only
3302                                          * when we can get write access.
3303                                          */
3304                                         locktype = isc_rwlocktype_write;
3305
3306                                         if (dns_rbtnode_refcurrent(node)
3307                                             == 0) {
3308                                                 isc_mem_t *m;
3309
3310                                                 m = search->rbtdb->common.mctx;
3311                                                 clean_stale_headers(m, header);
3312                                                 if (header_prev != NULL)
3313                                                         header_prev->next =
3314                                                                 header->next;
3315                                                 else
3316                                                         node->data = header->next;
3317                                                 free_rdataset(m, header);
3318                                         } else {
3319                                                 header->attributes |=
3320                                                         RDATASET_ATTR_STALE;
3321                                                 node->dirty = 1;
3322                                                 header_prev = header;
3323                                         }
3324                                 } else
3325                                         header_prev = header;
3326                                 continue;
3327                         }
3328                         if (NONEXISTENT(header) ||
3329                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
3330                                 header_prev = header;
3331                                 continue;
3332                         }
3333                         empty_node = ISC_FALSE;
3334                         if (header->type == matchtype)
3335                                 found = header;
3336                         else if (header->type == sigmatchtype)
3337                                 foundsig = header;
3338                         header_prev = header;
3339                 }
3340                 if (found != NULL) {
3341                         result = dns_name_concatenate(name, origin,
3342                                                       foundname, NULL);
3343                         if (result != ISC_R_SUCCESS)
3344                                 goto unlock_node;
3345                         bind_rdataset(search->rbtdb, node, found,
3346                                       now, rdataset);
3347                         if (foundsig != NULL)
3348                                 bind_rdataset(search->rbtdb, node, foundsig,
3349                                               now, sigrdataset);
3350                         new_reference(search->rbtdb, node);
3351                         *nodep = node;
3352                         result = DNS_R_COVERINGNSEC;
3353                 } else if (!empty_node) {
3354                         result = ISC_R_NOTFOUND;
3355                 } else
3356                         result = dns_rbtnodechain_prev(&search->chain, NULL,
3357                                                        NULL);
3358  unlock_node:
3359                 NODE_UNLOCK(lock, locktype);
3360         } while (empty_node && result == ISC_R_SUCCESS);
3361         return (result);
3362 }
3363
3364 static isc_result_t
3365 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3366            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3367            dns_dbnode_t **nodep, dns_name_t *foundname,
3368            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3369 {
3370         dns_rbtnode_t *node = NULL;
3371         isc_result_t result;
3372         rbtdb_search_t search;
3373         isc_boolean_t cname_ok = ISC_TRUE;
3374         isc_boolean_t empty_node;
3375         nodelock_t *lock;
3376         isc_rwlocktype_t locktype;
3377         rdatasetheader_t *header, *header_prev, *header_next;
3378         rdatasetheader_t *found, *nsheader;
3379         rdatasetheader_t *foundsig, *nssig, *cnamesig;
3380         rbtdb_rdatatype_t sigtype, negtype;
3381
3382         UNUSED(version);
3383
3384         search.rbtdb = (dns_rbtdb_t *)db;
3385
3386         REQUIRE(VALID_RBTDB(search.rbtdb));
3387         REQUIRE(version == NULL);
3388
3389         if (now == 0)
3390                 isc_stdtime_get(&now);
3391
3392         search.rbtversion = NULL;
3393         search.serial = 1;
3394         search.options = options;
3395         search.copy_name = ISC_FALSE;
3396         search.need_cleanup = ISC_FALSE;
3397         search.wild = ISC_FALSE;
3398         search.zonecut = NULL;
3399         dns_fixedname_init(&search.zonecut_name);
3400         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3401         search.now = now;
3402
3403         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3404
3405         /*
3406          * Search down from the root of the tree.  If, while going down, we
3407          * encounter a callback node, cache_zonecut_callback() will search the
3408          * rdatasets at the zone cut for a DNAME rdataset.
3409          */
3410         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
3411                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
3412                                   cache_zonecut_callback, &search);
3413
3414         if (result == DNS_R_PARTIALMATCH) {
3415                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
3416                         result = find_coveringnsec(&search, nodep, now,
3417                                                    foundname, rdataset,
3418                                                    sigrdataset);
3419                         if (result == DNS_R_COVERINGNSEC)
3420                                 goto tree_exit;
3421                 }
3422                 if (search.zonecut != NULL) {
3423                     result = setup_delegation(&search, nodep, foundname,
3424                                               rdataset, sigrdataset);
3425                     goto tree_exit;
3426                 } else {
3427                 find_ns:
3428                         result = find_deepest_zonecut(&search, node, nodep,
3429                                                       foundname, rdataset,
3430                                                       sigrdataset);
3431                         goto tree_exit;
3432                 }
3433         } else if (result != ISC_R_SUCCESS)
3434                 goto tree_exit;
3435
3436         /*
3437          * Certain DNSSEC types are not subject to CNAME matching
3438          * (RFC4035, section 2.5 and RFC3007).
3439          *
3440          * We don't check for RRSIG, because we don't store RRSIG records
3441          * directly.
3442          */
3443         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3444                 cname_ok = ISC_FALSE;
3445
3446         /*
3447          * We now go looking for rdata...
3448          */
3449
3450         lock = &(search.rbtdb->node_locks[node->locknum].lock);
3451         locktype = isc_rwlocktype_read;
3452         NODE_LOCK(lock, locktype);
3453
3454         found = NULL;
3455         foundsig = NULL;
3456         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3457         negtype = RBTDB_RDATATYPE_VALUE(0, type);
3458         nsheader = NULL;
3459         nssig = NULL;
3460         cnamesig = NULL;
3461         empty_node = ISC_TRUE;
3462         header_prev = NULL;
3463         for (header = node->data; header != NULL; header = header_next) {
3464                 header_next = header->next;
3465                 if (header->ttl <= now) {
3466                         /*
3467                          * This rdataset is stale.  If no one else is using the
3468                          * node, we can clean it up right now, otherwise we
3469                          * mark it as stale, and the node as dirty, so it will
3470                          * get cleaned up later.
3471                          */
3472                         if ((header->ttl <= now - RBTDB_VIRTUAL) &&
3473                             (locktype == isc_rwlocktype_write ||
3474                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3475                                 /*
3476                                  * We update the node's status only when we
3477                                  * can get write access.
3478                                  */
3479                                 locktype = isc_rwlocktype_write;
3480
3481                                 if (dns_rbtnode_refcurrent(node) == 0) {
3482                                         isc_mem_t *mctx;
3483
3484                                         mctx = search.rbtdb->common.mctx;
3485                                         clean_stale_headers(mctx, header);
3486                                         if (header_prev != NULL)
3487                                                 header_prev->next =
3488                                                         header->next;
3489                                         else
3490                                                 node->data = header->next;
3491                                         free_rdataset(mctx, header);
3492                                 } else {
3493                                         header->attributes |=
3494                                                 RDATASET_ATTR_STALE;
3495                                         node->dirty = 1;
3496                                         header_prev = header;
3497                                 }
3498                         } else
3499                                 header_prev = header;
3500                 } else if (EXISTS(header)) {
3501                         /*
3502                          * We now know that there is at least one active
3503                          * non-stale rdataset at this node.
3504                          */
3505                         empty_node = ISC_FALSE;
3506
3507                         /*
3508                          * If we found a type we were looking for, remember
3509                          * it.
3510                          */
3511                         if (header->type == type ||
3512                             (type == dns_rdatatype_any &&
3513                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
3514                             (cname_ok && header->type ==
3515                              dns_rdatatype_cname)) {
3516                                 /*
3517                                  * We've found the answer.
3518                                  */
3519                                 found = header;
3520                                 if (header->type == dns_rdatatype_cname &&
3521                                     cname_ok &&
3522                                     cnamesig != NULL) {
3523                                         /*
3524                                          * If we've already got the CNAME RRSIG,
3525                                          * use it, otherwise change sigtype
3526                                          * so that we find it.
3527                                          */
3528                                         if (cnamesig != NULL)
3529                                                 foundsig = cnamesig;
3530                                         else
3531                                                 sigtype =
3532                                                     RBTDB_RDATATYPE_SIGCNAME;
3533                                         foundsig = cnamesig;
3534                                 }
3535                         } else if (header->type == sigtype) {
3536                                 /*
3537                                  * We've found the RRSIG rdataset for our
3538                                  * target type.  Remember it.
3539                                  */
3540                                 foundsig = header;
3541                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
3542                                    header->type == negtype) {
3543                                 /*
3544                                  * We've found a negative cache entry.
3545                                  */
3546                                 found = header;
3547                         } else if (header->type == dns_rdatatype_ns) {
3548                                 /*
3549                                  * Remember a NS rdataset even if we're
3550                                  * not specifically looking for it, because
3551                                  * we might need it later.
3552                                  */
3553                                 nsheader = header;
3554                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
3555                                 /*
3556                                  * If we need the NS rdataset, we'll also
3557                                  * need its signature.
3558                                  */
3559                                 nssig = header;
3560                         } else if (cname_ok &&
3561                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3562                                 /*
3563                                  * If we get a CNAME match, we'll also need
3564                                  * its signature.
3565                                  */
3566                                 cnamesig = header;
3567                         }
3568                         header_prev = header;
3569                 } else
3570                         header_prev = header;
3571         }
3572
3573         if (empty_node) {
3574                 /*
3575                  * We have an exact match for the name, but there are no
3576                  * extant rdatasets.  That means that this node doesn't
3577                  * meaningfully exist, and that we really have a partial match.
3578                  */
3579                 NODE_UNLOCK(lock, locktype);
3580                 goto find_ns;
3581         }
3582
3583         /*
3584          * If we didn't find what we were looking for...
3585          */
3586         if (found == NULL ||
3587             (found->trust == dns_trust_glue &&
3588              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
3589             (found->trust == dns_trust_pending &&
3590              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
3591                 /*
3592                  * If there is an NS rdataset at this node, then this is the
3593                  * deepest zone cut.
3594                  */
3595                 if (nsheader != NULL) {
3596                         if (nodep != NULL) {
3597                                 new_reference(search.rbtdb, node);
3598                                 *nodep = node;
3599                         }
3600                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
3601                                       rdataset);
3602                         if (nssig != NULL)
3603                                 bind_rdataset(search.rbtdb, node, nssig,
3604                                               search.now, sigrdataset);
3605                         result = DNS_R_DELEGATION;
3606                         goto node_exit;
3607                 }
3608
3609                 /*
3610                  * Go find the deepest zone cut.
3611                  */
3612                 NODE_UNLOCK(lock, locktype);
3613                 goto find_ns;
3614         }
3615
3616         /*
3617          * We found what we were looking for, or we found a CNAME.
3618          */
3619
3620         if (nodep != NULL) {
3621                 new_reference(search.rbtdb, node);
3622                 *nodep = node;
3623         }
3624
3625         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
3626                 /*
3627                  * We found a negative cache entry.
3628                  */
3629                 if (NXDOMAIN(found))
3630                         result = DNS_R_NCACHENXDOMAIN;
3631                 else
3632                         result = DNS_R_NCACHENXRRSET;
3633         } else if (type != found->type &&
3634                    type != dns_rdatatype_any &&
3635                    found->type == dns_rdatatype_cname) {
3636                 /*
3637                  * We weren't doing an ANY query and we found a CNAME instead
3638                  * of the type we were looking for, so we need to indicate
3639                  * that result to the caller.
3640                  */
3641                 result = DNS_R_CNAME;
3642         } else {
3643                 /*
3644                  * An ordinary successful query!
3645                  */
3646                 result = ISC_R_SUCCESS;
3647         }
3648
3649         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
3650             result == DNS_R_NCACHENXRRSET) {
3651                 bind_rdataset(search.rbtdb, node, found, search.now,
3652                               rdataset);
3653                 if (foundsig != NULL)
3654                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
3655                                       sigrdataset);
3656         }
3657
3658  node_exit:
3659         NODE_UNLOCK(lock, locktype);
3660
3661  tree_exit:
3662         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3663
3664         /*
3665          * If we found a zonecut but aren't going to use it, we have to
3666          * let go of it.
3667          */
3668         if (search.need_cleanup) {
3669                 node = search.zonecut;
3670                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3671
3672                 NODE_LOCK(lock, isc_rwlocktype_read);
3673                 decrement_reference(search.rbtdb, node, 0,
3674                                     isc_rwlocktype_read, isc_rwlocktype_none);
3675                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3676         }
3677
3678         dns_rbtnodechain_reset(&search.chain);
3679
3680         return (result);
3681 }
3682
3683 static isc_result_t
3684 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3685                   isc_stdtime_t now, dns_dbnode_t **nodep,
3686                   dns_name_t *foundname,
3687                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3688 {
3689         dns_rbtnode_t *node = NULL;
3690         nodelock_t *lock;
3691         isc_result_t result;
3692         rbtdb_search_t search;
3693         rdatasetheader_t *header, *header_prev, *header_next;
3694         rdatasetheader_t *found, *foundsig;
3695         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
3696         isc_rwlocktype_t locktype;
3697
3698         search.rbtdb = (dns_rbtdb_t *)db;
3699
3700         REQUIRE(VALID_RBTDB(search.rbtdb));
3701
3702         if (now == 0)
3703                 isc_stdtime_get(&now);
3704
3705         search.rbtversion = NULL;
3706         search.serial = 1;
3707         search.options = options;
3708         search.copy_name = ISC_FALSE;
3709         search.need_cleanup = ISC_FALSE;
3710         search.wild = ISC_FALSE;
3711         search.zonecut = NULL;
3712         dns_fixedname_init(&search.zonecut_name);
3713         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3714         search.now = now;
3715
3716         if ((options & DNS_DBFIND_NOEXACT) != 0)
3717                 rbtoptions |= DNS_RBTFIND_NOEXACT;
3718
3719         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3720
3721         /*
3722          * Search down from the root of the tree.
3723          */
3724         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
3725                                   &search.chain, rbtoptions, NULL, &search);
3726
3727         if (result == DNS_R_PARTIALMATCH) {
3728         find_ns:
3729                 result = find_deepest_zonecut(&search, node, nodep, foundname,
3730                                               rdataset, sigrdataset);
3731                 goto tree_exit;
3732         } else if (result != ISC_R_SUCCESS)
3733                 goto tree_exit;
3734
3735         /*
3736          * We now go looking for an NS rdataset at the node.
3737          */
3738
3739         lock = &(search.rbtdb->node_locks[node->locknum].lock);
3740         locktype = isc_rwlocktype_read;
3741         NODE_LOCK(lock, locktype);
3742
3743         found = NULL;
3744         foundsig = NULL;
3745         header_prev = NULL;
3746         for (header = node->data; header != NULL; header = header_next) {
3747                 header_next = header->next;
3748                 if (header->ttl <= now) {
3749                         /*
3750                          * This rdataset is stale.  If no one else is using the
3751                          * node, we can clean it up right now, otherwise we
3752                          * mark it as stale, and the node as dirty, so it will
3753                          * get cleaned up later.
3754                          */
3755                         if ((header->ttl <= now - RBTDB_VIRTUAL) &&
3756                             (locktype == isc_rwlocktype_write ||
3757                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3758                                 /*
3759                                  * We update the node's status only when we
3760                                  * can get write access.
3761                                  */
3762                                 locktype = isc_rwlocktype_write;
3763
3764                                 if (dns_rbtnode_refcurrent(node) == 0) {
3765                                         isc_mem_t *mctx;
3766
3767                                         mctx = search.rbtdb->common.mctx;
3768                                         clean_stale_headers(mctx, header);
3769                                         if (header_prev != NULL)
3770                                                 header_prev->next =
3771                                                         header->next;
3772                                         else
3773                                                 node->data = header->next;
3774                                         free_rdataset(mctx, header);
3775                                 } else {
3776                                         header->attributes |=
3777                                                 RDATASET_ATTR_STALE;
3778                                         node->dirty = 1;
3779                                         header_prev = header;
3780                                 }
3781                         } else
3782                                 header_prev = header;
3783                 } else if (EXISTS(header)) {
3784                         /*
3785                          * If we found a type we were looking for, remember
3786                          * it.
3787                          */
3788                         if (header->type == dns_rdatatype_ns) {
3789                                 /*
3790                                  * Remember a NS rdataset even if we're
3791                                  * not specifically looking for it, because
3792                                  * we might need it later.
3793                                  */
3794                                 found = header;
3795                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
3796                                 /*
3797                                  * If we need the NS rdataset, we'll also
3798                                  * need its signature.
3799                                  */
3800                                 foundsig = header;
3801                         }
3802                         header_prev = header;
3803                 } else
3804                         header_prev = header;
3805         }
3806
3807         if (found == NULL) {
3808                 /*
3809                  * No NS records here.
3810                  */
3811                 NODE_UNLOCK(lock, locktype);
3812                 goto find_ns;
3813         }
3814
3815         if (nodep != NULL) {
3816                 new_reference(search.rbtdb, node);
3817                 *nodep = node;
3818         }
3819
3820         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
3821         if (foundsig != NULL)
3822                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
3823                               sigrdataset);
3824
3825         NODE_UNLOCK(lock, locktype);
3826
3827  tree_exit:
3828         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3829
3830         INSIST(!search.need_cleanup);
3831
3832         dns_rbtnodechain_reset(&search.chain);
3833
3834         if (result == DNS_R_DELEGATION)
3835                 result = ISC_R_SUCCESS;
3836
3837         return (result);
3838 }
3839
3840 static void
3841 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
3842         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3843         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
3844         unsigned int refs;
3845
3846         REQUIRE(VALID_RBTDB(rbtdb));
3847         REQUIRE(targetp != NULL && *targetp == NULL);
3848
3849         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
3850         dns_rbtnode_refincrement(node, &refs);
3851         INSIST(refs != 0);
3852         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
3853
3854         *targetp = source;
3855 }
3856
3857 static void
3858 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
3859         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3860         dns_rbtnode_t *node;
3861         isc_boolean_t want_free = ISC_FALSE;
3862         isc_boolean_t inactive = ISC_FALSE;
3863         rbtdb_nodelock_t *nodelock;
3864
3865         REQUIRE(VALID_RBTDB(rbtdb));
3866         REQUIRE(targetp != NULL && *targetp != NULL);
3867
3868         node = (dns_rbtnode_t *)(*targetp);
3869         nodelock = &rbtdb->node_locks[node->locknum];
3870
3871         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
3872
3873         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
3874                                 isc_rwlocktype_none)) {
3875                 if (isc_refcount_current(&nodelock->references) == 0 &&
3876                     nodelock->exiting) {
3877                         inactive = ISC_TRUE;
3878                 }
3879         }
3880
3881         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
3882
3883         *targetp = NULL;
3884
3885         if (inactive) {
3886                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
3887                 rbtdb->active--;
3888                 if (rbtdb->active == 0)
3889                         want_free = ISC_TRUE;
3890                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
3891                 if (want_free) {
3892                         char buf[DNS_NAME_FORMATSIZE];
3893                         if (dns_name_dynamic(&rbtdb->common.origin))
3894                                 dns_name_format(&rbtdb->common.origin, buf,
3895                                                 sizeof(buf));
3896                         else
3897                                 strcpy(buf, "<UNKNOWN>");
3898                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
3899                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
3900                                       "calling free_rbtdb(%s)", buf);
3901                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
3902                 }
3903         }
3904 }
3905
3906 static isc_result_t
3907 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
3908         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3909         dns_rbtnode_t *rbtnode = node;
3910         rdatasetheader_t *header;
3911         isc_boolean_t force_expire = ISC_FALSE;
3912         /*
3913          * These are the category and module used by the cache cleaner.
3914          */
3915         isc_boolean_t log = ISC_FALSE;
3916         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
3917         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
3918         int level = ISC_LOG_DEBUG(2);
3919         char printname[DNS_NAME_FORMATSIZE];
3920
3921         REQUIRE(VALID_RBTDB(rbtdb));
3922
3923         /*
3924          * Caller must hold a tree lock.
3925          */
3926
3927         if (now == 0)
3928                 isc_stdtime_get(&now);
3929
3930         if (rbtdb->overmem) {
3931                 isc_uint32_t val;
3932
3933                 isc_random_get(&val);
3934                 /*
3935                  * XXXDCL Could stand to have a better policy, like LRU.
3936                  */
3937                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
3938
3939                 /*
3940                  * Note that 'log' can be true IFF rbtdb->overmem is also true.
3941                  * rbtdb->ovemem can currently only be true for cache databases
3942                  * -- hence all of the "overmem cache" log strings.
3943                  */
3944                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
3945                 if (log)
3946                         isc_log_write(dns_lctx, category, module, level,
3947                                       "overmem cache: %s %s",
3948                                       force_expire ? "FORCE" : "check",
3949                                       dns_rbt_formatnodename(rbtnode,
3950                                                            printname,
3951                                                            sizeof(printname)));
3952         }
3953
3954         /*
3955          * We may not need write access, but this code path is not performance
3956          * sensitive, so it should be okay to always lock as a writer.
3957          */
3958         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
3959                   isc_rwlocktype_write);
3960
3961         for (header = rbtnode->data; header != NULL; header = header->next)
3962                 if (header->ttl <= now - RBTDB_VIRTUAL) {
3963                         /*
3964                          * We don't check if refcurrent(rbtnode) == 0 and try
3965                          * to free like we do in cache_find(), because
3966                          * refcurrent(rbtnode) must be non-zero.  This is so
3967                          * because 'node' is an argument to the function.
3968                          */
3969                         header->attributes |= RDATASET_ATTR_STALE;
3970                         rbtnode->dirty = 1;
3971                         if (log)
3972                                 isc_log_write(dns_lctx, category, module,
3973                                               level, "overmem cache: stale %s",
3974                                               printname);
3975                 } else if (force_expire) {
3976                         if (! RETAIN(header)) {
3977                                 header->ttl = 0;
3978                                 header->attributes |= RDATASET_ATTR_STALE;
3979                                 rbtnode->dirty = 1;
3980                         } else if (log) {
3981                                 isc_log_write(dns_lctx, category, module,
3982                                               level, "overmem cache: "
3983                                               "reprieve by RETAIN() %s",
3984                                               printname);
3985                         }
3986                 } else if (rbtdb->overmem && log)
3987                         isc_log_write(dns_lctx, category, module, level,
3988                                       "overmem cache: saved %s", printname);
3989
3990         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
3991                     isc_rwlocktype_write);
3992
3993         return (ISC_R_SUCCESS);
3994 }
3995
3996 static void
3997 overmem(dns_db_t *db, isc_boolean_t overmem) {
3998         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3999
4000         if (IS_CACHE(rbtdb)) {
4001                 rbtdb->overmem = overmem;
4002         }
4003 }
4004
4005 static void
4006 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
4007         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4008         dns_rbtnode_t *rbtnode = node;
4009         isc_boolean_t first;
4010
4011         REQUIRE(VALID_RBTDB(rbtdb));
4012
4013         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4014                   isc_rwlocktype_read);
4015
4016         fprintf(out, "node %p, %u references, locknum = %u\n",
4017                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
4018                 rbtnode->locknum);
4019         if (rbtnode->data != NULL) {
4020                 rdatasetheader_t *current, *top_next;
4021
4022                 for (current = rbtnode->data; current != NULL;
4023                      current = top_next) {
4024                         top_next = current->next;
4025                         first = ISC_TRUE;
4026                         fprintf(out, "\ttype %u", current->type);
4027                         do {
4028                                 if (!first)
4029                                         fprintf(out, "\t");
4030                                 first = ISC_FALSE;
4031                                 fprintf(out,
4032                                         "\tserial = %lu, ttl = %u, "
4033                                         "trust = %u, attributes = %u\n",
4034                                         (unsigned long)current->serial,
4035                                         current->ttl,
4036                                         current->trust,
4037                                         current->attributes);
4038                                 current = current->down;
4039                         } while (current != NULL);
4040                 }
4041         } else
4042                 fprintf(out, "(empty)\n");
4043
4044         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4045                     isc_rwlocktype_read);
4046 }
4047
4048 static isc_result_t
4049 createiterator(dns_db_t *db, isc_boolean_t relative_names,
4050                dns_dbiterator_t **iteratorp)
4051 {
4052         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4053         rbtdb_dbiterator_t *rbtdbiter;
4054
4055         REQUIRE(VALID_RBTDB(rbtdb));
4056
4057         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
4058         if (rbtdbiter == NULL)
4059                 return (ISC_R_NOMEMORY);
4060
4061         rbtdbiter->common.methods = &dbiterator_methods;
4062         rbtdbiter->common.db = NULL;
4063         dns_db_attach(db, &rbtdbiter->common.db);
4064         rbtdbiter->common.relative_names = relative_names;
4065         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
4066         rbtdbiter->common.cleaning = ISC_FALSE;
4067         rbtdbiter->paused = ISC_TRUE;
4068         rbtdbiter->tree_locked = isc_rwlocktype_none;
4069         rbtdbiter->result = ISC_R_SUCCESS;
4070         dns_fixedname_init(&rbtdbiter->name);
4071         dns_fixedname_init(&rbtdbiter->origin);
4072         rbtdbiter->node = NULL;
4073         rbtdbiter->delete = 0;
4074         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
4075         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
4076
4077         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
4078
4079         return (ISC_R_SUCCESS);
4080 }
4081
4082 static isc_result_t
4083 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4084                   dns_rdatatype_t type, dns_rdatatype_t covers,
4085                   isc_stdtime_t now, dns_rdataset_t *rdataset,
4086                   dns_rdataset_t *sigrdataset)
4087 {
4088         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4089         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4090         rdatasetheader_t *header, *header_next, *found, *foundsig;
4091         rbtdb_serial_t serial;
4092         rbtdb_version_t *rbtversion = version;
4093         isc_boolean_t close_version = ISC_FALSE;
4094         rbtdb_rdatatype_t matchtype, sigmatchtype;
4095
4096         REQUIRE(VALID_RBTDB(rbtdb));
4097         REQUIRE(type != dns_rdatatype_any);
4098
4099         if (rbtversion == NULL) {
4100                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
4101                 close_version = ISC_TRUE;
4102         }
4103         serial = rbtversion->serial;
4104         now = 0;
4105
4106         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4107                   isc_rwlocktype_read);
4108
4109         found = NULL;
4110         foundsig = NULL;
4111         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
4112         if (covers == 0)
4113                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4114         else
4115                 sigmatchtype = 0;
4116
4117         for (header = rbtnode->data; header != NULL; header = header_next) {
4118                 header_next = header->next;
4119                 do {
4120                         if (header->serial <= serial &&
4121                             !IGNORE(header)) {
4122                                 /*
4123                                  * Is this a "this rdataset doesn't
4124                                  * exist" record?
4125                                  */
4126                                 if (NONEXISTENT(header))
4127                                         header = NULL;
4128                                 break;
4129                         } else
4130                                 header = header->down;
4131                 } while (header != NULL);
4132                 if (header != NULL) {
4133                         /*
4134                          * We have an active, extant rdataset.  If it's a
4135                          * type we're looking for, remember it.
4136                          */
4137                         if (header->type == matchtype) {
4138                                 found = header;
4139                                 if (foundsig != NULL)
4140                                         break;
4141                         } else if (header->type == sigmatchtype) {
4142                                 foundsig = header;
4143                                 if (found != NULL)
4144                                         break;
4145                         }
4146                 }
4147         }
4148         if (found != NULL) {
4149                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
4150                 if (foundsig != NULL)
4151                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
4152                                       sigrdataset);
4153         }
4154
4155         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4156                     isc_rwlocktype_read);
4157
4158         if (close_version)
4159                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
4160                              ISC_FALSE);
4161
4162         if (found == NULL)
4163                 return (ISC_R_NOTFOUND);
4164
4165         return (ISC_R_SUCCESS);
4166 }
4167
4168 static isc_result_t
4169 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4170                    dns_rdatatype_t type, dns_rdatatype_t covers,
4171                    isc_stdtime_t now, dns_rdataset_t *rdataset,
4172                    dns_rdataset_t *sigrdataset)
4173 {
4174         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4175         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4176         rdatasetheader_t *header, *header_next, *found, *foundsig;
4177         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
4178         isc_result_t result;
4179         nodelock_t *lock;
4180         isc_rwlocktype_t locktype;
4181
4182         REQUIRE(VALID_RBTDB(rbtdb));
4183         REQUIRE(type != dns_rdatatype_any);
4184
4185         UNUSED(version);
4186
4187         result = ISC_R_SUCCESS;
4188
4189         if (now == 0)
4190                 isc_stdtime_get(&now);
4191
4192         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
4193         locktype = isc_rwlocktype_read;
4194         NODE_LOCK(lock, locktype);
4195
4196         found = NULL;
4197         foundsig = NULL;
4198         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
4199         negtype = RBTDB_RDATATYPE_VALUE(0, type);
4200         if (covers == 0)
4201                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4202         else
4203                 sigmatchtype = 0;
4204
4205         for (header = rbtnode->data; header != NULL; header = header_next) {
4206                 header_next = header->next;
4207                 if (header->ttl <= now) {
4208                         if ((header->ttl <= now - RBTDB_VIRTUAL) &&
4209                             (locktype == isc_rwlocktype_write ||
4210                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4211                                 /*
4212                                  * We update the node's status only when we
4213                                  * can get write access.
4214                                  */
4215                                 locktype = isc_rwlocktype_write;
4216
4217                                 /*
4218                                  * We don't check if refcurrent(rbtnode) == 0
4219                                  * and try to free like we do in cache_find(),
4220                                  * because refcurrent(rbtnode) must be
4221                                  * non-zero.  This is so because 'node' is an
4222                                  * argument to the function.
4223                                  */
4224                                 header->attributes |= RDATASET_ATTR_STALE;
4225                                 rbtnode->dirty = 1;
4226                         }
4227                 } else if (EXISTS(header)) {
4228                         if (header->type == matchtype)
4229                                 found = header;
4230                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4231                                  header->type == negtype)
4232                                 found = header;
4233                         else if (header->type == sigmatchtype)
4234                                 foundsig = header;
4235                 }
4236         }
4237         if (found != NULL) {
4238                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
4239                 if (foundsig != NULL)
4240                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
4241                                       sigrdataset);
4242         }
4243
4244         NODE_UNLOCK(lock, locktype);
4245
4246         if (found == NULL)
4247                 return (ISC_R_NOTFOUND);
4248
4249         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4250                 /*
4251                  * We found a negative cache entry.
4252                  */
4253                 if (NXDOMAIN(found))
4254                         result = DNS_R_NCACHENXDOMAIN;
4255                 else
4256                         result = DNS_R_NCACHENXRRSET;
4257         }
4258
4259         return (result);
4260 }
4261
4262 static isc_result_t
4263 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4264              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
4265 {
4266         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4267         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4268         rbtdb_version_t *rbtversion = version;
4269         rbtdb_rdatasetiter_t *iterator;
4270         unsigned int refs;
4271
4272         REQUIRE(VALID_RBTDB(rbtdb));
4273
4274         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
4275         if (iterator == NULL)
4276                 return (ISC_R_NOMEMORY);
4277
4278         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
4279                 now = 0;
4280                 if (rbtversion == NULL)
4281                         currentversion(db,
4282                                  (dns_dbversion_t **) (void *)(&rbtversion));
4283                 else {
4284                         unsigned int refs;
4285
4286                         isc_refcount_increment(&rbtversion->references,
4287                                                &refs);
4288                         INSIST(refs > 1);
4289                 }
4290         } else {
4291                 if (now == 0)
4292                         isc_stdtime_get(&now);
4293                 rbtversion = NULL;
4294         }
4295
4296         iterator->common.magic = DNS_RDATASETITER_MAGIC;
4297         iterator->common.methods = &rdatasetiter_methods;
4298         iterator->common.db = db;
4299         iterator->common.node = node;
4300         iterator->common.version = (dns_dbversion_t *)rbtversion;
4301         iterator->common.now = now;
4302
4303         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4304
4305         dns_rbtnode_refincrement(rbtnode, &refs);
4306         INSIST(refs != 0);
4307
4308         iterator->current = NULL;
4309
4310         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4311
4312         *iteratorp = (dns_rdatasetiter_t *)iterator;
4313
4314         return (ISC_R_SUCCESS);
4315 }
4316
4317 static isc_boolean_t
4318 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
4319         rdatasetheader_t *header, *header_next;
4320         isc_boolean_t cname, other_data;
4321         dns_rdatatype_t rdtype;
4322
4323         /*
4324          * The caller must hold the node lock.
4325          */
4326
4327         /*
4328          * Look for CNAME and "other data" rdatasets active in our version.
4329          */
4330         cname = ISC_FALSE;
4331         other_data = ISC_FALSE;
4332         for (header = node->data; header != NULL; header = header_next) {
4333                 header_next = header->next;
4334                 if (header->type == dns_rdatatype_cname) {
4335                         /*
4336                          * Look for an active extant CNAME.
4337                          */
4338                         do {
4339                                 if (header->serial <= serial &&
4340                                     !IGNORE(header)) {
4341                                         /*
4342                                          * Is this a "this rdataset doesn't
4343                                          * exist" record?
4344                                          */
4345                                         if (NONEXISTENT(header))
4346                                                 header = NULL;
4347                                         break;
4348                                 } else
4349                                         header = header->down;
4350                         } while (header != NULL);
4351                         if (header != NULL)
4352                                 cname = ISC_TRUE;
4353                 } else {
4354                         /*
4355                          * Look for active extant "other data".
4356                          *
4357                          * "Other data" is any rdataset whose type is not
4358                          * KEY, RRSIG KEY, NSEC, RRSIG NSEC or RRSIG CNAME.
4359                          */
4360                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
4361                         if (rdtype == dns_rdatatype_rrsig ||
4362                             rdtype == dns_rdatatype_sig)
4363                                 rdtype = RBTDB_RDATATYPE_EXT(header->type);
4364                         if (rdtype != dns_rdatatype_nsec &&
4365                             rdtype != dns_rdatatype_key &&
4366                             rdtype != dns_rdatatype_cname) {
4367                                 /*
4368                                  * We've found a type that isn't
4369                                  * NSEC, KEY, CNAME, or one of their
4370                                  * signatures.  Is it active and extant?
4371                                  */
4372                                 do {
4373                                         if (header->serial <= serial &&
4374                                             !IGNORE(header)) {
4375                                                 /*
4376                                                  * Is this a "this rdataset
4377                                                  * doesn't exist" record?
4378                                                  */
4379                                                 if (NONEXISTENT(header))
4380                                                         header = NULL;
4381                                                 break;
4382                                         } else
4383                                                 header = header->down;
4384                                 } while (header != NULL);
4385                                 if (header != NULL)
4386                                         other_data = ISC_TRUE;
4387                         }
4388                 }
4389         }
4390
4391         if (cname && other_data)
4392                 return (ISC_TRUE);
4393
4394         return (ISC_FALSE);
4395 }
4396
4397 static isc_result_t
4398 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
4399     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
4400     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
4401 {
4402         rbtdb_changed_t *changed = NULL;
4403         rdatasetheader_t *topheader, *topheader_prev, *header;
4404         unsigned char *merged;
4405         isc_result_t result;
4406         isc_boolean_t header_nx;
4407         isc_boolean_t newheader_nx;
4408         isc_boolean_t merge;
4409         dns_rdatatype_t rdtype, covers;
4410         rbtdb_rdatatype_t negtype;
4411         dns_trust_t trust;
4412
4413         /*
4414          * Add an rdatasetheader_t to a node.
4415          */
4416
4417         /*
4418          * Caller must be holding the node lock.
4419          */
4420
4421         if ((options & DNS_DBADD_MERGE) != 0) {
4422                 REQUIRE(rbtversion != NULL);
4423                 merge = ISC_TRUE;
4424         } else
4425                 merge = ISC_FALSE;
4426
4427         if ((options & DNS_DBADD_FORCE) != 0)
4428                 trust = dns_trust_ultimate;
4429         else
4430                 trust = newheader->trust;
4431
4432         if (rbtversion != NULL && !loading) {
4433                 /*
4434                  * We always add a changed record, even if no changes end up
4435                  * being made to this node, because it's harmless and
4436                  * simplifies the code.
4437                  */
4438                 changed = add_changed(rbtdb, rbtversion, rbtnode);
4439                 if (changed == NULL) {
4440                         free_rdataset(rbtdb->common.mctx, newheader);
4441                         return (ISC_R_NOMEMORY);
4442                 }
4443         }
4444
4445         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
4446         topheader_prev = NULL;
4447
4448         negtype = 0;
4449         if (rbtversion == NULL && !newheader_nx) {
4450                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
4451                 if (rdtype == 0) {
4452                         /*
4453                          * We're adding a negative cache entry.
4454                          */
4455                         covers = RBTDB_RDATATYPE_EXT(newheader->type);
4456                         if (covers == dns_rdatatype_any) {
4457                                 /*
4458                                  * We're adding an negative cache entry
4459                                  * which covers all types (NXDOMAIN,
4460                                  * NODATA(QTYPE=ANY)).
4461                                  *
4462                                  * We make all other data stale so that the
4463                                  * only rdataset that can be found at this
4464                                  * node is the negative cache entry.
4465                                  */
4466                                 for (topheader = rbtnode->data;
4467                                      topheader != NULL;
4468                                      topheader = topheader->next) {
4469                                         topheader->ttl = 0;
4470                                         topheader->attributes |=
4471                                                 RDATASET_ATTR_STALE;
4472                                 }
4473                                 rbtnode->dirty = 1;
4474                                 goto find_header;
4475                         }
4476                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
4477                 } else {
4478                         /*
4479                          * We're adding something that isn't a
4480                          * negative cache entry.  Look for an extant
4481                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
4482                          * cache entry.
4483                          */
4484                         for (topheader = rbtnode->data;
4485                              topheader != NULL;
4486                              topheader = topheader->next) {
4487                                 if (topheader->type ==
4488                                     RBTDB_RDATATYPE_NCACHEANY)
4489                                         break;
4490                         }
4491                         if (topheader != NULL && EXISTS(topheader) &&
4492                             topheader->ttl > now) {
4493                                 /*
4494                                  * Found one.
4495                                  */
4496                                 if (trust < topheader->trust) {
4497                                         /*
4498                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
4499                                          * is more trusted.
4500                                          */
4501
4502                                         free_rdataset(rbtdb->common.mctx,
4503                                                       newheader);
4504                                         if (addedrdataset != NULL)
4505                                                 bind_rdataset(rbtdb, rbtnode,
4506                                                               topheader, now,
4507                                                               addedrdataset);
4508                                         return (DNS_R_UNCHANGED);
4509                                 }
4510                                 /*
4511                                  * The new rdataset is better.  Expire the
4512                                  * NXDOMAIN/NODATA(QTYPE=ANY).
4513                                  */
4514                                 topheader->ttl = 0;
4515                                 topheader->attributes |= RDATASET_ATTR_STALE;
4516                                 rbtnode->dirty = 1;
4517                                 topheader = NULL;
4518                                 goto find_header;
4519                         }
4520                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
4521                 }
4522         }
4523
4524         for (topheader = rbtnode->data;
4525              topheader != NULL;
4526              topheader = topheader->next) {
4527                 if (topheader->type == newheader->type ||
4528                     topheader->type == negtype)
4529                         break;
4530                 topheader_prev = topheader;
4531         }
4532
4533  find_header:
4534         /*
4535          * If header isn't NULL, we've found the right type.  There may be
4536          * IGNORE rdatasets between the top of the chain and the first real
4537          * data.  We skip over them.
4538          */
4539         header = topheader;
4540         while (header != NULL && IGNORE(header))
4541                 header = header->down;
4542         if (header != NULL) {
4543                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
4544
4545                 /*
4546                  * Deleting an already non-existent rdataset has no effect.
4547                  */
4548                 if (header_nx && newheader_nx) {
4549                         free_rdataset(rbtdb->common.mctx, newheader);
4550                         return (DNS_R_UNCHANGED);
4551                 }
4552
4553                 /*
4554                  * Trying to add an rdataset with lower trust to a cache DB
4555                  * has no effect, provided that the cache data isn't stale.
4556                  */
4557                 if (rbtversion == NULL && trust < header->trust &&
4558                     (header->ttl > now || header_nx)) {
4559                         free_rdataset(rbtdb->common.mctx, newheader);
4560                         if (addedrdataset != NULL)
4561                                 bind_rdataset(rbtdb, rbtnode, header, now,
4562                                               addedrdataset);
4563                         return (DNS_R_UNCHANGED);
4564                 }
4565
4566                 /*
4567                  * Don't merge if a nonexistent rdataset is involved.
4568                  */
4569                 if (merge && (header_nx || newheader_nx))
4570                         merge = ISC_FALSE;
4571
4572                 /*
4573                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
4574                  * that is the union of 'newheader' and 'header'.
4575                  */
4576                 if (merge) {
4577                         unsigned int flags = 0;
4578                         INSIST(rbtversion->serial >= header->serial);
4579                         merged = NULL;
4580                         result = ISC_R_SUCCESS;
4581
4582                         if ((options & DNS_DBADD_EXACT) != 0)
4583                                 flags |= DNS_RDATASLAB_EXACT;
4584                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
4585                              newheader->ttl != header->ttl)
4586                                         result = DNS_R_NOTEXACT;
4587                         else if (newheader->ttl != header->ttl)
4588                                 flags |= DNS_RDATASLAB_FORCE;
4589                         if (result == ISC_R_SUCCESS)
4590                                 result = dns_rdataslab_merge(
4591                                              (unsigned char *)header,
4592                                              (unsigned char *)newheader,
4593                                              (unsigned int)(sizeof(*newheader)),
4594                                              rbtdb->common.mctx,
4595                                              rbtdb->common.rdclass,
4596                                              (dns_rdatatype_t)header->type,
4597                                              flags, &merged);
4598                         if (result == ISC_R_SUCCESS) {
4599                                 /*
4600                                  * If 'header' has the same serial number as
4601                                  * we do, we could clean it up now if we knew
4602                                  * that our caller had no references to it.
4603                                  * We don't know this, however, so we leave it
4604                                  * alone.  It will get cleaned up when
4605                                  * clean_zone_node() runs.
4606                                  */
4607                                 free_rdataset(rbtdb->common.mctx, newheader);
4608                                 newheader = (rdatasetheader_t *)merged;
4609                         } else {
4610                                 free_rdataset(rbtdb->common.mctx, newheader);
4611                                 return (result);
4612                         }
4613                 }
4614                 /*
4615                  * Don't replace existing NS, A and AAAA RRsets
4616                  * in the cache if they are already exist.  This
4617                  * prevents named being locked to old servers.
4618                  * Don't lower trust of existing record if the
4619                  * update is forced.
4620                  */
4621                 if (IS_CACHE(rbtdb) && header->ttl > now &&
4622                     header->type == dns_rdatatype_ns &&
4623                     !header_nx && !newheader_nx &&
4624                     header->trust >= newheader->trust &&
4625                     dns_rdataslab_equalx((unsigned char *)header,
4626                                          (unsigned char *)newheader,
4627                                          (unsigned int)(sizeof(*newheader)),
4628                                          rbtdb->common.rdclass,
4629                                          (dns_rdatatype_t)header->type)) {
4630                         /*
4631                          * Honour the new ttl if it is less than the
4632                          * older one.
4633                          */
4634                         if (header->ttl > newheader->ttl)
4635                                 header->ttl = newheader->ttl;
4636                         if (header->noqname == NULL &&
4637                             newheader->noqname != NULL) {
4638                                 header->noqname = newheader->noqname;
4639                                 newheader->noqname = NULL;
4640                         }
4641                         free_rdataset(rbtdb->common.mctx, newheader);
4642                         if (addedrdataset != NULL)
4643                                 bind_rdataset(rbtdb, rbtnode, header, now,
4644                                               addedrdataset);
4645                         return (ISC_R_SUCCESS);
4646                 }
4647                 if (IS_CACHE(rbtdb) && header->ttl > now &&
4648                     (header->type == dns_rdatatype_a ||
4649                      header->type == dns_rdatatype_aaaa) &&
4650                     !header_nx && !newheader_nx &&
4651                     header->trust >= newheader->trust &&
4652                     dns_rdataslab_equal((unsigned char *)header,
4653                                         (unsigned char *)newheader,
4654                                         (unsigned int)(sizeof(*newheader)))) {
4655                         /*
4656                          * Honour the new ttl if it is less than the
4657                          * older one.
4658                          */
4659                         if (header->ttl > newheader->ttl)
4660                                 header->ttl = newheader->ttl;
4661                         if (header->noqname == NULL &&
4662                             newheader->noqname != NULL) {
4663                                 header->noqname = newheader->noqname;
4664                                 newheader->noqname = NULL;
4665                         }
4666                         free_rdataset(rbtdb->common.mctx, newheader);
4667                         if (addedrdataset != NULL)
4668                                 bind_rdataset(rbtdb, rbtnode, header, now,
4669                                               addedrdataset);
4670                         return (ISC_R_SUCCESS);
4671                 }
4672                 INSIST(rbtversion == NULL ||
4673                        rbtversion->serial >= topheader->serial);
4674                 if (topheader_prev != NULL)
4675                         topheader_prev->next = newheader;
4676                 else
4677                         rbtnode->data = newheader;
4678                 newheader->next = topheader->next;
4679                 if (loading) {
4680                         /*
4681                          * There are no other references to 'header' when
4682                          * loading, so we MAY clean up 'header' now.
4683                          * Since we don't generate changed records when
4684                          * loading, we MUST clean up 'header' now.
4685                          */
4686                         newheader->down = NULL;
4687                         free_rdataset(rbtdb->common.mctx, header);
4688                 } else {
4689                         newheader->down = topheader;
4690                         topheader->next = newheader;
4691                         rbtnode->dirty = 1;
4692                         if (changed != NULL)
4693                                 changed->dirty = ISC_TRUE;
4694                         if (rbtversion == NULL) {
4695                                 header->ttl = 0;
4696                                 header->attributes |= RDATASET_ATTR_STALE;
4697                         }
4698                 }
4699         } else {
4700                 /*
4701                  * No non-IGNORED rdatasets of the given type exist at
4702                  * this node.
4703                  */
4704
4705                 /*
4706                  * If we're trying to delete the type, don't bother.
4707                  */
4708                 if (newheader_nx) {
4709                         free_rdataset(rbtdb->common.mctx, newheader);
4710                         return (DNS_R_UNCHANGED);
4711                 }
4712
4713                 if (topheader != NULL) {
4714                         /*
4715                          * We have an list of rdatasets of the given type,
4716                          * but they're all marked IGNORE.  We simply insert
4717                          * the new rdataset at the head of the list.
4718                          *
4719                          * Ignored rdatasets cannot occur during loading, so
4720                          * we INSIST on it.
4721                          */
4722                         INSIST(!loading);
4723                         INSIST(rbtversion == NULL ||
4724                                rbtversion->serial >= topheader->serial);
4725                         if (topheader_prev != NULL)
4726                                 topheader_prev->next = newheader;
4727                         else
4728                                 rbtnode->data = newheader;
4729                         newheader->next = topheader->next;
4730                         newheader->down = topheader;
4731                         topheader->next = newheader;
4732                         rbtnode->dirty = 1;
4733                         if (changed != NULL)
4734                                 changed->dirty = ISC_TRUE;
4735                 } else {
4736                         /*
4737                          * No rdatasets of the given type exist at the node.
4738                          */
4739                         newheader->next = rbtnode->data;
4740                         newheader->down = NULL;
4741                         rbtnode->data = newheader;
4742                 }
4743         }
4744
4745         /*
4746          * Check if the node now contains CNAME and other data.
4747          */
4748         if (rbtversion != NULL &&
4749             cname_and_other_data(rbtnode, rbtversion->serial))
4750                 return (DNS_R_CNAMEANDOTHER);
4751
4752         if (addedrdataset != NULL)
4753                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
4754
4755         return (ISC_R_SUCCESS);
4756 }
4757
4758 static inline isc_boolean_t
4759 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
4760                 rbtdb_rdatatype_t type)
4761 {
4762         if (IS_CACHE(rbtdb)) {
4763                 if (type == dns_rdatatype_dname)
4764                         return (ISC_TRUE);
4765                 else
4766                         return (ISC_FALSE);
4767         } else if (type == dns_rdatatype_dname ||
4768                    (type == dns_rdatatype_ns &&
4769                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
4770                 return (ISC_TRUE);
4771         return (ISC_FALSE);
4772 }
4773
4774 static inline isc_result_t
4775 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
4776            dns_rdataset_t *rdataset)
4777 {
4778         struct noqname *noqname;
4779         isc_mem_t *mctx = rbtdb->common.mctx;
4780         dns_name_t name;
4781         dns_rdataset_t nsec, nsecsig;
4782         isc_result_t result;
4783         isc_region_t r;
4784
4785         dns_name_init(&name, NULL);
4786         dns_rdataset_init(&nsec);
4787         dns_rdataset_init(&nsecsig);
4788
4789         result = dns_rdataset_getnoqname(rdataset, &name, &nsec, &nsecsig);
4790         RUNTIME_CHECK(result == ISC_R_SUCCESS);
4791
4792         noqname = isc_mem_get(mctx, sizeof(*noqname));
4793         if (noqname == NULL) {
4794                 result = ISC_R_NOMEMORY;
4795                 goto cleanup;
4796         }
4797         dns_name_init(&noqname->name, NULL);
4798         noqname->nsec = NULL;
4799         noqname->nsecsig = NULL;
4800         result = dns_name_dup(&name, mctx, &noqname->name);
4801         if (result != ISC_R_SUCCESS)
4802                 goto cleanup;
4803         result = dns_rdataslab_fromrdataset(&nsec, mctx, &r, 0);
4804         if (result != ISC_R_SUCCESS)
4805                 goto cleanup;
4806         noqname->nsec = r.base;
4807         result = dns_rdataslab_fromrdataset(&nsecsig, mctx, &r, 0);
4808         if (result != ISC_R_SUCCESS)
4809                 goto cleanup;
4810         noqname->nsecsig = r.base;
4811         dns_rdataset_disassociate(&nsec);
4812         dns_rdataset_disassociate(&nsecsig);
4813         newheader->noqname = noqname;
4814         return (ISC_R_SUCCESS);
4815
4816 cleanup:
4817         dns_rdataset_disassociate(&nsec);
4818         dns_rdataset_disassociate(&nsecsig);
4819         free_noqname(mctx, &noqname);
4820         return(result);
4821 }
4822
4823 static isc_result_t
4824 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4825             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
4826             dns_rdataset_t *addedrdataset)
4827 {
4828         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4829         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4830         rbtdb_version_t *rbtversion = version;
4831         isc_region_t region;
4832         rdatasetheader_t *newheader;
4833         isc_result_t result;
4834         isc_boolean_t delegating;
4835
4836         REQUIRE(VALID_RBTDB(rbtdb));
4837
4838         if (rbtversion == NULL) {
4839                 if (now == 0)
4840                         isc_stdtime_get(&now);
4841         } else
4842                 now = 0;
4843
4844         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
4845                                             &region,
4846                                             sizeof(rdatasetheader_t));
4847         if (result != ISC_R_SUCCESS)
4848                 return (result);
4849
4850         newheader = (rdatasetheader_t *)region.base;
4851         newheader->ttl = rdataset->ttl + now;
4852         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
4853                                                 rdataset->covers);
4854         newheader->attributes = 0;
4855         newheader->noqname = NULL;
4856         newheader->count = init_count++;
4857         newheader->trust = rdataset->trust;
4858         newheader->additional_auth = NULL;
4859         newheader->additional_glue = NULL;
4860         if (rbtversion != NULL) {
4861                 newheader->serial = rbtversion->serial;
4862                 now = 0;
4863         } else {
4864                 newheader->serial = 1;
4865                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
4866                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
4867                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
4868                         result = addnoqname(rbtdb, newheader, rdataset);
4869                         if (result != ISC_R_SUCCESS) {
4870                                 free_rdataset(rbtdb->common.mctx, newheader);
4871                                 return (result);
4872                         }
4873                 }
4874         }
4875
4876         /*
4877          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
4878          * just DNAME for the cache), then we need to set the callback bit
4879          * on the node, and to do that we must be holding an exclusive lock
4880          * on the tree.
4881          */
4882         if (delegating_type(rbtdb, rbtnode, rdataset->type)) {
4883                 delegating = ISC_TRUE;
4884                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
4885         } else
4886                 delegating = ISC_FALSE;
4887
4888         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4889                   isc_rwlocktype_write);
4890
4891         result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
4892                      addedrdataset, now);
4893         if (result == ISC_R_SUCCESS && delegating)
4894                 rbtnode->find_callback = 1;
4895
4896         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4897                     isc_rwlocktype_write);
4898
4899         if (delegating)
4900                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
4901
4902         /*
4903          * Update the zone's secure status.  If version is non-NULL
4904          * this is defered until closeversion() is called.
4905          */
4906         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
4907                 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
4908
4909         return (result);
4910 }
4911
4912 static isc_result_t
4913 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4914                  dns_rdataset_t *rdataset, unsigned int options,
4915                  dns_rdataset_t *newrdataset)
4916 {
4917         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4918         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4919         rbtdb_version_t *rbtversion = version;
4920         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
4921         unsigned char *subresult;
4922         isc_region_t region;
4923         isc_result_t result;
4924         rbtdb_changed_t *changed;
4925
4926         REQUIRE(VALID_RBTDB(rbtdb));
4927
4928         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
4929                                             &region,
4930                                             sizeof(rdatasetheader_t));
4931         if (result != ISC_R_SUCCESS)
4932                 return (result);
4933         newheader = (rdatasetheader_t *)region.base;
4934         newheader->ttl = rdataset->ttl;
4935         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
4936                                                 rdataset->covers);
4937         newheader->attributes = 0;
4938         newheader->serial = rbtversion->serial;
4939         newheader->trust = 0;
4940         newheader->noqname = NULL;
4941         newheader->count = init_count++;
4942         newheader->additional_auth = NULL;
4943         newheader->additional_glue = NULL;
4944
4945         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4946                   isc_rwlocktype_write);
4947
4948         changed = add_changed(rbtdb, rbtversion, rbtnode);
4949         if (changed == NULL) {
4950                 free_rdataset(rbtdb->common.mctx, newheader);
4951                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4952                             isc_rwlocktype_write);
4953                 return (ISC_R_NOMEMORY);
4954         }
4955
4956         topheader_prev = NULL;
4957         for (topheader = rbtnode->data;
4958              topheader != NULL;
4959              topheader = topheader->next) {
4960                 if (topheader->type == newheader->type)
4961                         break;
4962                 topheader_prev = topheader;
4963         }
4964         /*
4965          * If header isn't NULL, we've found the right type.  There may be
4966          * IGNORE rdatasets between the top of the chain and the first real
4967          * data.  We skip over them.
4968          */
4969         header = topheader;
4970         while (header != NULL && IGNORE(header))
4971                 header = header->down;
4972         if (header != NULL && EXISTS(header)) {
4973                 unsigned int flags = 0;
4974                 subresult = NULL;
4975                 result = ISC_R_SUCCESS;
4976                 if ((options & DNS_DBSUB_EXACT) != 0) {
4977                         flags |= DNS_RDATASLAB_EXACT;
4978                         if (newheader->ttl != header->ttl)
4979                                 result = DNS_R_NOTEXACT;
4980                 }
4981                 if (result == ISC_R_SUCCESS)
4982                         result = dns_rdataslab_subtract(
4983                                         (unsigned char *)header,
4984                                         (unsigned char *)newheader,
4985                                         (unsigned int)(sizeof(*newheader)),
4986                                         rbtdb->common.mctx,
4987                                         rbtdb->common.rdclass,
4988                                         (dns_rdatatype_t)header->type,
4989                                         flags, &subresult);
4990                 if (result == ISC_R_SUCCESS) {
4991                         free_rdataset(rbtdb->common.mctx, newheader);
4992                         newheader = (rdatasetheader_t *)subresult;
4993                         /*
4994                          * We have to set the serial since the rdataslab
4995                          * subtraction routine copies the reserved portion of
4996                          * header, not newheader.
4997                          */
4998                         newheader->serial = rbtversion->serial;
4999                         /*
5000                          * XXXJT: dns_rdataslab_subtract() copied the pointers
5001                          * to additional info.  We need to clear these fields
5002                          * to avoid having duplicated references.
5003                          */
5004                         newheader->additional_auth = NULL;
5005                         newheader->additional_glue = NULL;
5006                 } else if (result == DNS_R_NXRRSET) {
5007                         /*
5008                          * This subtraction would remove all of the rdata;
5009                          * add a nonexistent header instead.
5010                          */
5011                         free_rdataset(rbtdb->common.mctx, newheader);
5012                         newheader = isc_mem_get(rbtdb->common.mctx,
5013                                                 sizeof(*newheader));
5014                         if (newheader == NULL) {
5015                                 result = ISC_R_NOMEMORY;
5016                                 goto unlock;
5017                         }
5018                         newheader->ttl = 0;
5019                         newheader->type = topheader->type;
5020                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
5021                         newheader->trust = 0;
5022                         newheader->serial = rbtversion->serial;
5023                         newheader->noqname = NULL;
5024                         newheader->count = 0;
5025                         newheader->additional_auth = NULL;
5026                         newheader->additional_glue = NULL;
5027                 } else {
5028                         free_rdataset(rbtdb->common.mctx, newheader);
5029                         goto unlock;
5030                 }
5031
5032                 /*
5033                  * If we're here, we want to link newheader in front of
5034                  * topheader.
5035                  */
5036                 INSIST(rbtversion->serial >= topheader->serial);
5037                 if (topheader_prev != NULL)
5038                         topheader_prev->next = newheader;
5039                 else
5040                         rbtnode->data = newheader;
5041                 newheader->next = topheader->next;
5042                 newheader->down = topheader;
5043                 topheader->next = newheader;
5044                 rbtnode->dirty = 1;
5045                 changed->dirty = ISC_TRUE;
5046         } else {
5047                 /*
5048                  * The rdataset doesn't exist, so we don't need to do anything
5049                  * to satisfy the deletion request.
5050                  */
5051                 free_rdataset(rbtdb->common.mctx, newheader);
5052                 if ((options & DNS_DBSUB_EXACT) != 0)
5053                         result = DNS_R_NOTEXACT;
5054                 else
5055                         result = DNS_R_UNCHANGED;
5056         }
5057
5058         if (result == ISC_R_SUCCESS && newrdataset != NULL)
5059                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
5060
5061  unlock:
5062         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5063                     isc_rwlocktype_write);
5064
5065         /*
5066          * Update the zone's secure status.  If version is non-NULL
5067          * this is defered until closeversion() is called.
5068          */
5069         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
5070                 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
5071
5072         return (result);
5073 }
5074
5075 static isc_result_t
5076 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5077                dns_rdatatype_t type, dns_rdatatype_t covers)
5078 {
5079         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5080         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5081         rbtdb_version_t *rbtversion = version;
5082         isc_result_t result;
5083         rdatasetheader_t *newheader;
5084
5085         REQUIRE(VALID_RBTDB(rbtdb));
5086
5087         if (type == dns_rdatatype_any)
5088                 return (ISC_R_NOTIMPLEMENTED);
5089         if (type == dns_rdatatype_rrsig && covers == 0)
5090                 return (ISC_R_NOTIMPLEMENTED);
5091
5092         newheader = isc_mem_get(rbtdb->common.mctx, sizeof(*newheader));
5093         if (newheader == NULL)
5094                 return (ISC_R_NOMEMORY);
5095         newheader->ttl = 0;
5096         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
5097         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
5098         newheader->trust = 0;
5099         newheader->noqname = NULL;
5100         newheader->additional_auth = NULL;
5101         newheader->additional_glue = NULL;
5102         if (rbtversion != NULL)
5103                 newheader->serial = rbtversion->serial;
5104         else
5105                 newheader->serial = 0;
5106         newheader->count = 0;
5107
5108         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5109                   isc_rwlocktype_write);
5110
5111         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
5112                      ISC_FALSE, NULL, 0);
5113
5114         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5115                     isc_rwlocktype_write);
5116
5117         /*
5118          * Update the zone's secure status.  If version is non-NULL
5119          * this is defered until closeversion() is called.
5120          */
5121         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
5122                 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
5123
5124         return (result);
5125 }
5126
5127 static isc_result_t
5128 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
5129         rbtdb_load_t *loadctx = arg;
5130         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
5131         dns_rbtnode_t *node;
5132         isc_result_t result;
5133         isc_region_t region;
5134         rdatasetheader_t *newheader;
5135
5136         /*
5137          * This routine does no node locking.  See comments in
5138          * 'load' below for more information on loading and
5139          * locking.
5140          */
5141
5142
5143         /*
5144          * SOA records are only allowed at top of zone.
5145          */
5146         if (rdataset->type == dns_rdatatype_soa &&
5147             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
5148                 return (DNS_R_NOTZONETOP);
5149
5150         add_empty_wildcards(rbtdb, name);
5151
5152         if (dns_name_iswildcard(name)) {
5153                 /*
5154                  * NS record owners cannot legally be wild cards.
5155                  */
5156                 if (rdataset->type == dns_rdatatype_ns)
5157                         return (DNS_R_INVALIDNS);
5158                 result = add_wildcard_magic(rbtdb, name);
5159                 if (result != ISC_R_SUCCESS)
5160                         return (result);
5161         }
5162
5163         node = NULL;
5164         result = dns_rbt_addnode(rbtdb->tree, name, &node);
5165         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
5166                 return (result);
5167         if (result != ISC_R_EXISTS) {
5168                 dns_name_t foundname;
5169                 dns_name_init(&foundname, NULL);
5170                 dns_rbt_namefromnode(node, &foundname);
5171 #ifdef DNS_RBT_USEHASH
5172                 node->locknum = node->hashval % rbtdb->node_lock_count;
5173 #else
5174                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
5175                         rbtdb->node_lock_count;
5176 #endif
5177         }
5178
5179         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5180                                             &region,
5181                                             sizeof(rdatasetheader_t));
5182         if (result != ISC_R_SUCCESS)
5183                 return (result);
5184         newheader = (rdatasetheader_t *)region.base;
5185         newheader->ttl = rdataset->ttl + loadctx->now; /* XXX overflow check */
5186         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5187                                                 rdataset->covers);
5188         newheader->attributes = 0;
5189         newheader->trust = rdataset->trust;
5190         newheader->serial = 1;
5191         newheader->noqname = NULL;
5192         newheader->count = init_count++;
5193         newheader->additional_auth = NULL;
5194         newheader->additional_glue = NULL;
5195
5196         result = add(rbtdb, node, rbtdb->current_version, newheader,
5197                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
5198         if (result == ISC_R_SUCCESS &&
5199             delegating_type(rbtdb, node, rdataset->type))
5200                 node->find_callback = 1;
5201         else if (result == DNS_R_UNCHANGED)
5202                 result = ISC_R_SUCCESS;
5203
5204         return (result);
5205 }
5206
5207 static isc_result_t
5208 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
5209         rbtdb_load_t *loadctx;
5210         dns_rbtdb_t *rbtdb;
5211
5212         rbtdb = (dns_rbtdb_t *)db;
5213
5214         REQUIRE(VALID_RBTDB(rbtdb));
5215
5216         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
5217         if (loadctx == NULL)
5218                 return (ISC_R_NOMEMORY);
5219
5220         loadctx->rbtdb = rbtdb;
5221         if (IS_CACHE(rbtdb))
5222                 isc_stdtime_get(&loadctx->now);
5223         else
5224                 loadctx->now = 0;
5225
5226         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5227
5228         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
5229                 == 0);
5230         rbtdb->attributes |= RBTDB_ATTR_LOADING;
5231
5232         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5233
5234         *addp = loading_addrdataset;
5235         *dbloadp = loadctx;
5236
5237         return (ISC_R_SUCCESS);
5238 }
5239
5240 static isc_result_t
5241 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
5242         rbtdb_load_t *loadctx;
5243         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5244
5245         REQUIRE(VALID_RBTDB(rbtdb));
5246         REQUIRE(dbloadp != NULL);
5247         loadctx = *dbloadp;
5248         REQUIRE(loadctx->rbtdb == rbtdb);
5249
5250         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5251
5252         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
5253         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
5254
5255         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
5256         rbtdb->attributes |= RBTDB_ATTR_LOADED;
5257
5258         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5259
5260         /*
5261          * If there's a KEY rdataset at the zone origin containing a
5262          * zone key, we consider the zone secure.
5263          */
5264         if (! IS_CACHE(rbtdb))
5265                 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
5266
5267         *dbloadp = NULL;
5268
5269         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
5270
5271         return (ISC_R_SUCCESS);
5272 }
5273
5274 static isc_result_t
5275 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
5276      dns_masterformat_t masterformat) {
5277         dns_rbtdb_t *rbtdb;
5278
5279         rbtdb = (dns_rbtdb_t *)db;
5280
5281         REQUIRE(VALID_RBTDB(rbtdb));
5282
5283         return (dns_master_dump2(rbtdb->common.mctx, db, version,
5284                                  &dns_master_style_default,
5285                                  filename, masterformat));
5286 }
5287
5288 static void
5289 delete_callback(void *data, void *arg) {
5290         dns_rbtdb_t *rbtdb = arg;
5291         rdatasetheader_t *current, *next;
5292
5293         for (current = data; current != NULL; current = next) {
5294                 next = current->next;
5295                 free_rdataset(rbtdb->common.mctx, current);
5296         }
5297 }
5298
5299 static isc_boolean_t
5300 issecure(dns_db_t *db) {
5301         dns_rbtdb_t *rbtdb;
5302         isc_boolean_t secure;
5303
5304         rbtdb = (dns_rbtdb_t *)db;
5305
5306         REQUIRE(VALID_RBTDB(rbtdb));
5307
5308         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5309         secure = rbtdb->secure;
5310         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5311
5312         return (secure);
5313 }
5314
5315 static unsigned int
5316 nodecount(dns_db_t *db) {
5317         dns_rbtdb_t *rbtdb;
5318         unsigned int count;
5319
5320         rbtdb = (dns_rbtdb_t *)db;
5321
5322         REQUIRE(VALID_RBTDB(rbtdb));
5323
5324         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5325         count = dns_rbt_nodecount(rbtdb->tree);
5326         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5327
5328         return (count);
5329 }
5330
5331 static void
5332 settask(dns_db_t *db, isc_task_t *task) {
5333         dns_rbtdb_t *rbtdb;
5334
5335         rbtdb = (dns_rbtdb_t *)db;
5336
5337         REQUIRE(VALID_RBTDB(rbtdb));
5338
5339         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5340         if (rbtdb->task != NULL)
5341                 isc_task_detach(&rbtdb->task);
5342         if (task != NULL)
5343                 isc_task_attach(task, &rbtdb->task);
5344         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5345 }
5346
5347 static isc_boolean_t
5348 ispersistent(dns_db_t *db) {
5349         UNUSED(db);
5350         return (ISC_FALSE);
5351 }
5352
5353 static isc_result_t
5354 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
5355         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5356         dns_rbtnode_t *onode;
5357         isc_result_t result = ISC_R_SUCCESS;
5358
5359         REQUIRE(VALID_RBTDB(rbtdb));
5360         REQUIRE(nodep != NULL && *nodep == NULL);
5361
5362         /* Note that the access to origin_node doesn't require a DB lock */
5363         onode = (dns_rbtnode_t *)rbtdb->origin_node;
5364         if (onode != NULL) {
5365                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
5366                 new_reference(rbtdb, onode);
5367                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
5368
5369                 *nodep = rbtdb->origin_node;
5370         } else {
5371                 INSIST(!IS_CACHE(rbtdb));
5372                 result = ISC_R_NOTFOUND;
5373         }
5374
5375         return (result);
5376 }
5377
5378 static dns_dbmethods_t zone_methods = {
5379         attach,
5380         detach,
5381         beginload,
5382         endload,
5383         dump,
5384         currentversion,
5385         newversion,
5386         attachversion,
5387         closeversion,
5388         findnode,
5389         zone_find,
5390         zone_findzonecut,
5391         attachnode,
5392         detachnode,
5393         expirenode,
5394         printnode,
5395         createiterator,
5396         zone_findrdataset,
5397         allrdatasets,
5398         addrdataset,
5399         subtractrdataset,
5400         deleterdataset,
5401         issecure,
5402         nodecount,
5403         ispersistent,
5404         overmem,
5405         settask,
5406         getoriginnode
5407 };
5408
5409 static dns_dbmethods_t cache_methods = {
5410         attach,
5411         detach,
5412         beginload,
5413         endload,
5414         dump,
5415         currentversion,
5416         newversion,
5417         attachversion,
5418         closeversion,
5419         findnode,
5420         cache_find,
5421         cache_findzonecut,
5422         attachnode,
5423         detachnode,
5424         expirenode,
5425         printnode,
5426         createiterator,
5427         cache_findrdataset,
5428         allrdatasets,
5429         addrdataset,
5430         subtractrdataset,
5431         deleterdataset,
5432         issecure,
5433         nodecount,
5434         ispersistent,
5435         overmem,
5436         settask,
5437         getoriginnode
5438 };
5439
5440 isc_result_t
5441 #ifdef DNS_RBTDB_VERSION64
5442 dns_rbtdb64_create
5443 #else
5444 dns_rbtdb_create
5445 #endif
5446                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
5447                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
5448                  void *driverarg, dns_db_t **dbp)
5449 {
5450         dns_rbtdb_t *rbtdb;
5451         isc_result_t result;
5452         int i;
5453         dns_name_t name;
5454
5455         /* Keep the compiler happy. */
5456         UNUSED(argc);
5457         UNUSED(argv);
5458         UNUSED(driverarg);
5459
5460         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
5461         if (rbtdb == NULL)
5462                 return (ISC_R_NOMEMORY);
5463
5464         memset(rbtdb, '\0', sizeof(*rbtdb));
5465         dns_name_init(&rbtdb->common.origin, NULL);
5466         rbtdb->common.attributes = 0;
5467         if (type == dns_dbtype_cache) {
5468                 rbtdb->common.methods = &cache_methods;
5469                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
5470         } else if (type == dns_dbtype_stub) {
5471                 rbtdb->common.methods = &zone_methods;
5472                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
5473         } else
5474                 rbtdb->common.methods = &zone_methods;
5475         rbtdb->common.rdclass = rdclass;
5476         rbtdb->common.mctx = NULL;
5477
5478         result = RBTDB_INITLOCK(&rbtdb->lock);
5479         if (result != ISC_R_SUCCESS)
5480                 goto cleanup_rbtdb;
5481
5482         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
5483         if (result != ISC_R_SUCCESS)
5484                 goto cleanup_lock;
5485
5486         if (rbtdb->node_lock_count == 0) {
5487                 if (IS_CACHE(rbtdb))
5488                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
5489                 else
5490                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
5491         }
5492         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
5493         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
5494                                         sizeof(rbtdb_nodelock_t));
5495         if (rbtdb->node_locks == NULL) {
5496                 result = ISC_R_NOMEMORY;
5497                 goto cleanup_tree_lock;
5498         }
5499
5500         rbtdb->active = rbtdb->node_lock_count;
5501
5502         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
5503                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
5504                 if (result == ISC_R_SUCCESS) {
5505                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
5506                         if (result != ISC_R_SUCCESS)
5507                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
5508                 }
5509                 if (result != ISC_R_SUCCESS) {
5510                         while (i-- > 0) {
5511                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
5512                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
5513                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
5514                         }
5515                         goto cleanup_node_locks;
5516                 }
5517                 rbtdb->node_locks[i].exiting = ISC_FALSE;
5518         }
5519
5520         /*
5521          * Attach to the mctx.  The database will persist so long as there
5522          * are references to it, and attaching to the mctx ensures that our
5523          * mctx won't disappear out from under us.
5524          */
5525         isc_mem_attach(mctx, &rbtdb->common.mctx);
5526
5527         /*
5528          * Must be initalized before free_rbtdb() is called.
5529          */
5530         isc_ondestroy_init(&rbtdb->common.ondest);
5531
5532         /*
5533          * Make a copy of the origin name.
5534          */
5535         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
5536         if (result != ISC_R_SUCCESS) {
5537                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
5538                 return (result);
5539         }
5540
5541         /*
5542          * Make the Red-Black Tree.
5543          */
5544         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
5545         if (result != ISC_R_SUCCESS) {
5546                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
5547                 return (result);
5548         }
5549         /*
5550          * In order to set the node callback bit correctly in zone databases,
5551          * we need to know if the node has the origin name of the zone.
5552          * In loading_addrdataset() we could simply compare the new name
5553          * to the origin name, but this is expensive.  Also, we don't know the
5554          * node name in addrdataset(), so we need another way of knowing the
5555          * zone's top.
5556          *
5557          * We now explicitly create a node for the zone's origin, and then
5558          * we simply remember the node's address.  This is safe, because
5559          * the top-of-zone node can never be deleted, nor can its address
5560          * change.
5561          */
5562         if (!IS_CACHE(rbtdb)) {
5563                 rbtdb->origin_node = NULL;
5564                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
5565                                          &rbtdb->origin_node);
5566                 if (result != ISC_R_SUCCESS) {
5567                         INSIST(result != ISC_R_EXISTS);
5568                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
5569                         return (result);
5570                 }
5571                 /*
5572                  * We need to give the origin node the right locknum.
5573                  */
5574                 dns_name_init(&name, NULL);
5575                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
5576 #ifdef DNS_RBT_USEHASH
5577                 rbtdb->origin_node->locknum =
5578                         rbtdb->origin_node->hashval %
5579                         rbtdb->node_lock_count;
5580 #else
5581                 rbtdb->origin_node->locknum =
5582                         dns_name_hash(&name, ISC_TRUE) %
5583                         rbtdb->node_lock_count;
5584 #endif
5585         }
5586
5587         /*
5588          * Misc. Initialization.
5589          */
5590         result = isc_refcount_init(&rbtdb->references, 1);
5591         if (result != ISC_R_SUCCESS) {
5592                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
5593                 return (result);
5594         }
5595         rbtdb->attributes = 0;
5596         rbtdb->secure = ISC_FALSE;
5597         rbtdb->overmem = ISC_FALSE;
5598         rbtdb->task = NULL;
5599
5600         /*
5601          * Version Initialization.
5602          */
5603         rbtdb->current_serial = 1;
5604         rbtdb->least_serial = 1;
5605         rbtdb->next_serial = 2;
5606         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
5607         if (rbtdb->current_version == NULL) {
5608                 isc_refcount_decrement(&rbtdb->references, NULL);
5609                 isc_refcount_destroy(&rbtdb->references);
5610                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
5611                 return (ISC_R_NOMEMORY);
5612         }
5613         rbtdb->future_version = NULL;
5614         ISC_LIST_INIT(rbtdb->open_versions);
5615         /*
5616          * Keep the current version in the open list so that list operation
5617          * won't happen in normal lookup operations.
5618          */
5619         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
5620
5621         rbtdb->common.magic = DNS_DB_MAGIC;
5622         rbtdb->common.impmagic = RBTDB_MAGIC;
5623
5624         *dbp = (dns_db_t *)rbtdb;
5625
5626         return (ISC_R_SUCCESS);
5627
5628  cleanup_node_locks:
5629         isc_mem_put(mctx, rbtdb->node_locks,
5630                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
5631
5632  cleanup_tree_lock:
5633         isc_rwlock_destroy(&rbtdb->tree_lock);
5634
5635  cleanup_lock:
5636         RBTDB_DESTROYLOCK(&rbtdb->lock);
5637
5638  cleanup_rbtdb:
5639         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
5640         return (result);
5641 }
5642
5643
5644 /*
5645  * Slabbed Rdataset Methods
5646  */
5647
5648 static void
5649 rdataset_disassociate(dns_rdataset_t *rdataset) {
5650         dns_db_t *db = rdataset->private1;
5651         dns_dbnode_t *node = rdataset->private2;
5652
5653         detachnode(db, &node);
5654 }
5655
5656 static isc_result_t
5657 rdataset_first(dns_rdataset_t *rdataset) {
5658         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
5659         unsigned int count;
5660
5661         count = raw[0] * 256 + raw[1];
5662         if (count == 0) {
5663                 rdataset->private5 = NULL;
5664                 return (ISC_R_NOMORE);
5665         }
5666
5667 #if DNS_RDATASET_FIXED
5668         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
5669                 raw += 2 + (4 * count);
5670         else
5671 #endif
5672                 raw += 2;
5673
5674         /*
5675          * The privateuint4 field is the number of rdata beyond the
5676          * cursor position, so we decrement the total count by one
5677          * before storing it.
5678          *
5679          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
5680          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
5681          * to the first entry in the offset table.
5682          */
5683         count--;
5684         rdataset->privateuint4 = count;
5685         rdataset->private5 = raw;
5686
5687         return (ISC_R_SUCCESS);
5688 }
5689
5690 static isc_result_t
5691 rdataset_next(dns_rdataset_t *rdataset) {
5692         unsigned int count;
5693         unsigned int length;
5694         unsigned char *raw;     /* RDATASLAB */
5695
5696         count = rdataset->privateuint4;
5697         if (count == 0)
5698                 return (ISC_R_NOMORE);
5699         count--;
5700         rdataset->privateuint4 = count;
5701
5702         /*
5703          * Skip forward one record (length + 4) or one offset (4).
5704          */
5705         raw = rdataset->private5;
5706 #if DNS_RDATASET_FIXED
5707         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
5708 #endif
5709                 length = raw[0] * 256 + raw[1];
5710                 raw += length;
5711 #if DNS_RDATASET_FIXED
5712         }
5713         rdataset->private5 = raw + 4;           /* length(2) + order(2) */
5714 #else
5715         rdataset->private5 = raw + 2;           /* length(2) */
5716 #endif
5717
5718         return (ISC_R_SUCCESS);
5719 }
5720
5721 static void
5722 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
5723         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
5724 #if DNS_RDATASET_FIXED
5725         unsigned int offset;
5726 #endif
5727         isc_region_t r;
5728
5729         REQUIRE(raw != NULL);
5730
5731         /*
5732          * Find the start of the record if not already in private5
5733          * then skip the length and order fields.
5734          */
5735 #if DNS_RDATASET_FIXED
5736         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
5737                 offset = (raw[0] << 24) + (raw[1] << 16) +
5738                          (raw[2] << 8) + raw[3];
5739                 raw = rdataset->private3;
5740                 raw += offset;
5741         }
5742 #endif
5743         r.length = raw[0] * 256 + raw[1];
5744
5745 #if DNS_RDATASET_FIXED
5746         raw += 4;
5747 #else
5748         raw += 2;
5749 #endif
5750         r.base = raw;
5751         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
5752 }
5753
5754 static void
5755 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
5756         dns_db_t *db = source->private1;
5757         dns_dbnode_t *node = source->private2;
5758         dns_dbnode_t *cloned_node = NULL;
5759
5760         attachnode(db, node, &cloned_node);
5761         *target = *source;
5762
5763         /*
5764          * Reset iterator state.
5765          */
5766         target->privateuint4 = 0;
5767         target->private5 = NULL;
5768 }
5769
5770 static unsigned int
5771 rdataset_count(dns_rdataset_t *rdataset) {
5772         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
5773         unsigned int count;
5774
5775         count = raw[0] * 256 + raw[1];
5776
5777         return (count);
5778 }
5779
5780 static isc_result_t
5781 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
5782                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
5783 {
5784         dns_db_t *db = rdataset->private1;
5785         dns_dbnode_t *node = rdataset->private2;
5786         dns_dbnode_t *cloned_node;
5787         struct noqname *noqname = rdataset->private6;
5788
5789         cloned_node = NULL;
5790         attachnode(db, node, &cloned_node);
5791         nsec->methods = &rdataset_methods;
5792         nsec->rdclass = db->rdclass;
5793         nsec->type = dns_rdatatype_nsec;
5794         nsec->covers = 0;
5795         nsec->ttl = rdataset->ttl;
5796         nsec->trust = rdataset->trust;
5797         nsec->private1 = rdataset->private1;
5798         nsec->private2 = rdataset->private2;
5799         nsec->private3 = noqname->nsec;
5800         nsec->privateuint4 = 0;
5801         nsec->private5 = NULL;
5802         nsec->private6 = NULL;
5803
5804         cloned_node = NULL;
5805         attachnode(db, node, &cloned_node);
5806         nsecsig->methods = &rdataset_methods;
5807         nsecsig->rdclass = db->rdclass;
5808         nsecsig->type = dns_rdatatype_rrsig;
5809         nsecsig->covers = dns_rdatatype_nsec;
5810         nsecsig->ttl = rdataset->ttl;
5811         nsecsig->trust = rdataset->trust;
5812         nsecsig->private1 = rdataset->private1;
5813         nsecsig->private2 = rdataset->private2;
5814         nsecsig->private3 = noqname->nsecsig;
5815         nsecsig->privateuint4 = 0;
5816         nsecsig->private5 = NULL;
5817         nsec->private6 = NULL;
5818
5819         dns_name_clone(&noqname->name, name);
5820
5821         return (ISC_R_SUCCESS);
5822 }
5823
5824 /*
5825  * Rdataset Iterator Methods
5826  */
5827
5828 static void
5829 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
5830         rbtdb_rdatasetiter_t *rbtiterator;
5831
5832         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
5833
5834         if (rbtiterator->common.version != NULL)
5835                 closeversion(rbtiterator->common.db,
5836                              &rbtiterator->common.version, ISC_FALSE);
5837         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
5838         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
5839                     sizeof(*rbtiterator));
5840
5841         *iteratorp = NULL;
5842 }
5843
5844 static isc_result_t
5845 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
5846         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
5847         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
5848         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
5849         rbtdb_version_t *rbtversion = rbtiterator->common.version;
5850         rdatasetheader_t *header, *top_next;
5851         rbtdb_serial_t serial;
5852         isc_stdtime_t now;
5853
5854         if (IS_CACHE(rbtdb)) {
5855                 serial = 1;
5856                 now = rbtiterator->common.now;
5857         } else {
5858                 serial = rbtversion->serial;
5859                 now = 0;
5860         }
5861
5862         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5863                   isc_rwlocktype_read);
5864
5865         for (header = rbtnode->data; header != NULL; header = top_next) {
5866                 top_next = header->next;
5867                 do {
5868                         if (header->serial <= serial && !IGNORE(header)) {
5869                                 /*
5870                                  * Is this a "this rdataset doesn't exist"
5871                                  * record?  Or is it too old in the cache?
5872                                  *
5873                                  * Note: unlike everywhere else, we
5874                                  * check for now > header->ttl instead
5875                                  * of now >= header->ttl.  This allows
5876                                  * ANY and RRSIG queries for 0 TTL
5877                                  * rdatasets to work.
5878                                  */
5879                                 if (NONEXISTENT(header) ||
5880                                     (now != 0 && now > header->ttl))
5881                                         header = NULL;
5882                                 break;
5883                         } else
5884                                 header = header->down;
5885                 } while (header != NULL);
5886                 if (header != NULL)
5887                         break;
5888         }
5889
5890         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5891                     isc_rwlocktype_read);
5892
5893         rbtiterator->current = header;
5894
5895         if (header == NULL)
5896                 return (ISC_R_NOMORE);
5897
5898         return (ISC_R_SUCCESS);
5899 }
5900
5901 static isc_result_t
5902 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
5903         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
5904         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
5905         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
5906         rbtdb_version_t *rbtversion = rbtiterator->common.version;
5907         rdatasetheader_t *header, *top_next;
5908         rbtdb_serial_t serial;
5909         isc_stdtime_t now;
5910         rbtdb_rdatatype_t type, negtype;
5911         dns_rdatatype_t rdtype, covers;
5912
5913         header = rbtiterator->current;
5914         if (header == NULL)
5915                 return (ISC_R_NOMORE);
5916
5917         if (IS_CACHE(rbtdb)) {
5918                 serial = 1;
5919                 now = rbtiterator->common.now;
5920         } else {
5921                 serial = rbtversion->serial;
5922                 now = 0;
5923         }
5924
5925         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5926                   isc_rwlocktype_read);
5927
5928         type = header->type;
5929         rdtype = RBTDB_RDATATYPE_BASE(header->type);
5930         if (rdtype == 0) {
5931                 covers = RBTDB_RDATATYPE_EXT(header->type);
5932                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5933         } else
5934                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5935         for (header = header->next; header != NULL; header = top_next) {
5936                 top_next = header->next;
5937                 /*
5938                  * If not walking back up the down list.
5939                  */
5940                 if (header->type != type && header->type != negtype) {
5941                         do {
5942                                 if (header->serial <= serial &&
5943                                     !IGNORE(header)) {
5944                                         /*
5945                                          * Is this a "this rdataset doesn't
5946                                          * exist" record?
5947                                          *
5948                                          * Note: unlike everywhere else, we
5949                                          * check for now > header->ttl instead
5950                                          * of now >= header->ttl.  This allows
5951                                          * ANY and RRSIG queries for 0 TTL
5952                                          * rdatasets to work.
5953                                          */
5954                                         if ((header->attributes &
5955                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
5956                                             (now != 0 && now > header->ttl))
5957                                                 header = NULL;
5958                                         break;
5959                                 } else
5960                                         header = header->down;
5961                         } while (header != NULL);
5962                         if (header != NULL)
5963                                 break;
5964                 }
5965         }
5966
5967         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5968                     isc_rwlocktype_read);
5969
5970         rbtiterator->current = header;
5971
5972         if (header == NULL)
5973                 return (ISC_R_NOMORE);
5974
5975         return (ISC_R_SUCCESS);
5976 }
5977
5978 static void
5979 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
5980         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
5981         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
5982         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
5983         rdatasetheader_t *header;
5984
5985         header = rbtiterator->current;
5986         REQUIRE(header != NULL);
5987
5988         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5989                   isc_rwlocktype_read);
5990
5991         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
5992                       rdataset);
5993
5994         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5995                     isc_rwlocktype_read);
5996 }
5997
5998
5999 /*
6000  * Database Iterator Methods
6001  */
6002
6003 static inline void
6004 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
6005         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6006         dns_rbtnode_t *node = rbtdbiter->node;
6007
6008         if (node == NULL)
6009                 return;
6010
6011         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
6012         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
6013         new_reference(rbtdb, node);
6014         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
6015 }
6016
6017 static inline void
6018 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
6019         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6020         dns_rbtnode_t *node = rbtdbiter->node;
6021         nodelock_t *lock;
6022
6023         if (node == NULL)
6024                 return;
6025
6026         lock = &rbtdb->node_locks[node->locknum].lock;
6027         NODE_LOCK(lock, isc_rwlocktype_read);
6028         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
6029                             rbtdbiter->tree_locked);
6030         NODE_UNLOCK(lock, isc_rwlocktype_read);
6031
6032         rbtdbiter->node = NULL;
6033 }
6034
6035 static void
6036 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
6037         dns_rbtnode_t *node;
6038         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6039         isc_boolean_t was_read_locked = ISC_FALSE;
6040         nodelock_t *lock;
6041         int i;
6042
6043         if (rbtdbiter->delete != 0) {
6044                 /*
6045                  * Note that "%d node of %d in tree" can report things like
6046                  * "flush_deletions: 59 nodes of 41 in tree".  This means
6047                  * That some nodes appear on the deletions list more than
6048                  * once.  Only the last occurence will actually be deleted.
6049                  */
6050                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
6051                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
6052                               "flush_deletions: %d nodes of %d in tree",
6053                               rbtdbiter->delete,
6054                               dns_rbt_nodecount(rbtdb->tree));
6055
6056                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
6057                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6058                         was_read_locked = ISC_TRUE;
6059                 }
6060                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6061                 rbtdbiter->tree_locked = isc_rwlocktype_write;
6062
6063                 for (i = 0; i < rbtdbiter->delete; i++) {
6064                         node = rbtdbiter->deletions[i];
6065                         lock = &rbtdb->node_locks[node->locknum].lock;
6066
6067                         NODE_LOCK(lock, isc_rwlocktype_read);
6068                         decrement_reference(rbtdb, node, 0,
6069                                             isc_rwlocktype_read,
6070                                             rbtdbiter->tree_locked);
6071                         NODE_UNLOCK(lock, isc_rwlocktype_read);
6072                 }
6073
6074                 rbtdbiter->delete = 0;
6075
6076                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6077                 if (was_read_locked) {
6078                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6079                         rbtdbiter->tree_locked = isc_rwlocktype_read;
6080
6081                 } else {
6082                         rbtdbiter->tree_locked = isc_rwlocktype_none;
6083                 }
6084         }
6085 }
6086
6087 static inline void
6088 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
6089         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6090
6091         REQUIRE(rbtdbiter->paused);
6092         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
6093
6094         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6095         rbtdbiter->tree_locked = isc_rwlocktype_read;
6096
6097         rbtdbiter->paused = ISC_FALSE;
6098 }
6099
6100 static void
6101 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
6102         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
6103         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6104         dns_db_t *db = NULL;
6105
6106         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
6107                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6108                 rbtdbiter->tree_locked = isc_rwlocktype_none;
6109         } else
6110                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
6111
6112         dereference_iter_node(rbtdbiter);
6113
6114         flush_deletions(rbtdbiter);
6115
6116         dns_db_attach(rbtdbiter->common.db, &db);
6117         dns_db_detach(&rbtdbiter->common.db);
6118
6119         dns_rbtnodechain_reset(&rbtdbiter->chain);
6120         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
6121         dns_db_detach(&db);
6122
6123         *iteratorp = NULL;
6124 }
6125
6126 static isc_result_t
6127 dbiterator_first(dns_dbiterator_t *iterator) {
6128         isc_result_t result;
6129         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6130         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6131         dns_name_t *name, *origin;
6132
6133         if (rbtdbiter->result != ISC_R_SUCCESS &&
6134             rbtdbiter->result != ISC_R_NOMORE)
6135                 return (rbtdbiter->result);
6136
6137         if (rbtdbiter->paused)
6138                 resume_iteration(rbtdbiter);
6139
6140         dereference_iter_node(rbtdbiter);
6141
6142         name = dns_fixedname_name(&rbtdbiter->name);
6143         origin = dns_fixedname_name(&rbtdbiter->origin);
6144         dns_rbtnodechain_reset(&rbtdbiter->chain);
6145
6146         result = dns_rbtnodechain_first(&rbtdbiter->chain, rbtdb->tree, name,
6147                                         origin);
6148
6149         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
6150                 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
6151                                                   NULL, &rbtdbiter->node);
6152                 if (result == ISC_R_SUCCESS) {
6153                         rbtdbiter->new_origin = ISC_TRUE;
6154                         reference_iter_node(rbtdbiter);
6155                 }
6156         } else {
6157                 INSIST(result == ISC_R_NOTFOUND);
6158                 result = ISC_R_NOMORE; /* The tree is empty. */
6159         }
6160
6161         rbtdbiter->result = result;
6162
6163         return (result);
6164 }
6165
6166 static isc_result_t
6167 dbiterator_last(dns_dbiterator_t *iterator) {
6168         isc_result_t result;
6169         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6170         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6171         dns_name_t *name, *origin;
6172
6173         if (rbtdbiter->result != ISC_R_SUCCESS &&
6174             rbtdbiter->result != ISC_R_NOMORE)
6175                 return (rbtdbiter->result);
6176
6177         if (rbtdbiter->paused)
6178                 resume_iteration(rbtdbiter);
6179
6180         dereference_iter_node(rbtdbiter);
6181
6182         name = dns_fixedname_name(&rbtdbiter->name);
6183         origin = dns_fixedname_name(&rbtdbiter->origin);
6184         dns_rbtnodechain_reset(&rbtdbiter->chain);
6185
6186         result = dns_rbtnodechain_last(&rbtdbiter->chain, rbtdb->tree, name,
6187                                        origin);
6188         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
6189                 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
6190                                                   NULL, &rbtdbiter->node);
6191                 if (result == ISC_R_SUCCESS) {
6192                         rbtdbiter->new_origin = ISC_TRUE;
6193                         reference_iter_node(rbtdbiter);
6194                 }
6195         } else {
6196                 INSIST(result == ISC_R_NOTFOUND);
6197                 result = ISC_R_NOMORE; /* The tree is empty. */
6198         }
6199
6200         rbtdbiter->result = result;
6201
6202         return (result);
6203 }
6204
6205 static isc_result_t
6206 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
6207         isc_result_t result;
6208         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6209         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6210         dns_name_t *iname, *origin;
6211
6212         if (rbtdbiter->result != ISC_R_SUCCESS &&
6213             rbtdbiter->result != ISC_R_NOMORE)
6214                 return (rbtdbiter->result);
6215
6216         if (rbtdbiter->paused)
6217                 resume_iteration(rbtdbiter);
6218
6219         dereference_iter_node(rbtdbiter);
6220
6221         iname = dns_fixedname_name(&rbtdbiter->name);
6222         origin = dns_fixedname_name(&rbtdbiter->origin);
6223         dns_rbtnodechain_reset(&rbtdbiter->chain);
6224
6225         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &rbtdbiter->node,
6226                                   &rbtdbiter->chain, DNS_RBTFIND_EMPTYDATA,
6227                                   NULL, NULL);
6228         if (result == ISC_R_SUCCESS) {
6229                 result = dns_rbtnodechain_current(&rbtdbiter->chain, iname,
6230                                                   origin, NULL);
6231                 if (result == ISC_R_SUCCESS) {
6232                         rbtdbiter->new_origin = ISC_TRUE;
6233                         reference_iter_node(rbtdbiter);
6234                 }
6235
6236         } else if (result == DNS_R_PARTIALMATCH)
6237                 result = ISC_R_NOTFOUND;
6238
6239         rbtdbiter->result = result;
6240
6241         return (result);
6242 }
6243
6244 static isc_result_t
6245 dbiterator_prev(dns_dbiterator_t *iterator) {
6246         isc_result_t result;
6247         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6248         dns_name_t *name, *origin;
6249
6250         REQUIRE(rbtdbiter->node != NULL);
6251
6252         if (rbtdbiter->result != ISC_R_SUCCESS)
6253                 return (rbtdbiter->result);
6254
6255         if (rbtdbiter->paused)
6256                 resume_iteration(rbtdbiter);
6257
6258         name = dns_fixedname_name(&rbtdbiter->name);
6259         origin = dns_fixedname_name(&rbtdbiter->origin);
6260         result = dns_rbtnodechain_prev(&rbtdbiter->chain, name, origin);
6261
6262         dereference_iter_node(rbtdbiter);
6263
6264         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
6265                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
6266                 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
6267                                                   NULL, &rbtdbiter->node);
6268         }
6269
6270         if (result == ISC_R_SUCCESS)
6271                 reference_iter_node(rbtdbiter);
6272
6273         rbtdbiter->result = result;
6274
6275         return (result);
6276 }
6277
6278 static isc_result_t
6279 dbiterator_next(dns_dbiterator_t *iterator) {
6280         isc_result_t result;
6281         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6282         dns_name_t *name, *origin;
6283
6284         REQUIRE(rbtdbiter->node != NULL);
6285
6286         if (rbtdbiter->result != ISC_R_SUCCESS)
6287                 return (rbtdbiter->result);
6288
6289         if (rbtdbiter->paused)
6290                 resume_iteration(rbtdbiter);
6291
6292         name = dns_fixedname_name(&rbtdbiter->name);
6293         origin = dns_fixedname_name(&rbtdbiter->origin);
6294         result = dns_rbtnodechain_next(&rbtdbiter->chain, name, origin);
6295
6296         dereference_iter_node(rbtdbiter);
6297
6298         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
6299                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
6300                 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
6301                                                   NULL, &rbtdbiter->node);
6302         }
6303         if (result == ISC_R_SUCCESS)
6304                 reference_iter_node(rbtdbiter);
6305
6306         rbtdbiter->result = result;
6307
6308         return (result);
6309 }
6310
6311 static isc_result_t
6312 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
6313                    dns_name_t *name)
6314 {
6315         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6316         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6317         dns_rbtnode_t *node = rbtdbiter->node;
6318         isc_result_t result;
6319         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
6320         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
6321
6322         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
6323         REQUIRE(rbtdbiter->node != NULL);
6324
6325         if (rbtdbiter->paused)
6326                 resume_iteration(rbtdbiter);
6327
6328         if (name != NULL) {
6329                 if (rbtdbiter->common.relative_names)
6330                         origin = NULL;
6331                 result = dns_name_concatenate(nodename, origin, name, NULL);
6332                 if (result != ISC_R_SUCCESS)
6333                         return (result);
6334                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
6335                         result = DNS_R_NEWORIGIN;
6336         } else
6337                 result = ISC_R_SUCCESS;
6338
6339         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
6340         new_reference(rbtdb, node);
6341         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
6342
6343         *nodep = rbtdbiter->node;
6344
6345         if (iterator->cleaning && result == ISC_R_SUCCESS) {
6346                 isc_result_t expire_result;
6347
6348                 /*
6349                  * If the deletion array is full, flush it before trying
6350                  * to expire the current node.  The current node can't
6351                  * fully deleted while the iteration cursor is still on it.
6352                  */
6353                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
6354                         flush_deletions(rbtdbiter);
6355
6356                 expire_result = expirenode(iterator->db, *nodep, 0);
6357
6358                 /*
6359                  * expirenode() currently always returns success.
6360                  */
6361                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
6362                         unsigned int refs;
6363
6364                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
6365                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
6366                         dns_rbtnode_refincrement(node, &refs);
6367                         INSIST(refs != 0);
6368                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
6369                 }
6370         }
6371
6372         return (result);
6373 }
6374
6375 static isc_result_t
6376 dbiterator_pause(dns_dbiterator_t *iterator) {
6377         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6378         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6379
6380         if (rbtdbiter->result != ISC_R_SUCCESS &&
6381             rbtdbiter->result != ISC_R_NOMORE)
6382                 return (rbtdbiter->result);
6383
6384         if (rbtdbiter->paused)
6385                 return (ISC_R_SUCCESS);
6386
6387         rbtdbiter->paused = ISC_TRUE;
6388
6389         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
6390                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
6391                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6392                 rbtdbiter->tree_locked = isc_rwlocktype_none;
6393         }
6394
6395         flush_deletions(rbtdbiter);
6396
6397         return (ISC_R_SUCCESS);
6398 }
6399
6400 static isc_result_t
6401 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
6402         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6403         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
6404
6405         if (rbtdbiter->result != ISC_R_SUCCESS)
6406                 return (rbtdbiter->result);
6407
6408         return (dns_name_copy(origin, name, NULL));
6409 }
6410
6411 /*%
6412  * Additional cache routines.
6413  */
6414 static isc_result_t
6415 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
6416                        dns_rdatatype_t qtype, dns_acache_t *acache,
6417                        dns_zone_t **zonep, dns_db_t **dbp,
6418                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
6419                        dns_name_t *fname, dns_message_t *msg,
6420                        isc_stdtime_t now)
6421 {
6422         dns_rbtdb_t *rbtdb = rdataset->private1;
6423         dns_rbtnode_t *rbtnode = rdataset->private2;
6424         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
6425         unsigned int current_count = rdataset->privateuint4;
6426         unsigned int count;
6427         rdatasetheader_t *header;
6428         nodelock_t *nodelock;
6429         unsigned int total_count;
6430         acachectl_t *acarray;
6431         dns_acacheentry_t *entry;
6432         isc_result_t result;
6433
6434         UNUSED(qtype); /* we do not use this value at least for now */
6435         UNUSED(acache);
6436
6437         header = (struct rdatasetheader *)(raw - sizeof(*header));
6438
6439         total_count = raw[0] * 256 + raw[1];
6440         INSIST(total_count > current_count);
6441         count = total_count - current_count - 1;
6442
6443         acarray = NULL;
6444
6445         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
6446         NODE_LOCK(nodelock, isc_rwlocktype_read);
6447
6448         switch (type) {
6449         case dns_rdatasetadditional_fromauth:
6450                 acarray = header->additional_auth;
6451                 break;
6452         case dns_rdatasetadditional_fromcache:
6453                 acarray = NULL;
6454                 break;
6455         case dns_rdatasetadditional_fromglue:
6456                 acarray = header->additional_glue;
6457                 break;
6458         default:
6459                 INSIST(0);
6460         }
6461
6462         if (acarray == NULL) {
6463                 if (type != dns_rdatasetadditional_fromcache)
6464                         dns_acache_countquerymiss(acache);
6465                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
6466                 return (ISC_R_NOTFOUND);
6467         }
6468
6469         if (acarray[count].entry == NULL) {
6470                 dns_acache_countquerymiss(acache);
6471                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
6472                 return (ISC_R_NOTFOUND);
6473         }
6474
6475         entry = NULL;
6476         dns_acache_attachentry(acarray[count].entry, &entry);
6477
6478         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
6479
6480         result = dns_acache_getentry(entry, zonep, dbp, versionp,
6481                                      nodep, fname, msg, now);
6482
6483         dns_acache_detachentry(&entry);
6484
6485         return (result);
6486 }
6487
6488 static void
6489 acache_callback(dns_acacheentry_t *entry, void **arg) {
6490         dns_rbtdb_t *rbtdb;
6491         dns_rbtnode_t *rbtnode;
6492         nodelock_t *nodelock;
6493         acachectl_t *acarray = NULL;
6494         acache_cbarg_t *cbarg;
6495         unsigned int count;
6496
6497         REQUIRE(arg != NULL);
6498         cbarg = *arg;
6499
6500         /*
6501          * The caller must hold the entry lock.
6502          */
6503
6504         rbtdb = (dns_rbtdb_t *)cbarg->db;
6505         rbtnode = (dns_rbtnode_t *)cbarg->node;
6506
6507         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
6508         NODE_LOCK(nodelock, isc_rwlocktype_write);
6509
6510         switch (cbarg->type) {
6511         case dns_rdatasetadditional_fromauth:
6512                 acarray = cbarg->header->additional_auth;
6513                 break;
6514         case dns_rdatasetadditional_fromglue:
6515                 acarray = cbarg->header->additional_glue;
6516                 break;
6517         default:
6518                 INSIST(0);
6519         }
6520
6521         count = cbarg->count;
6522         if (acarray != NULL && acarray[count].entry == entry) {
6523                 acarray[count].entry = NULL;
6524                 INSIST(acarray[count].cbarg == cbarg);
6525                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
6526                 acarray[count].cbarg = NULL;
6527         } else
6528                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
6529
6530         dns_acache_detachentry(&entry);
6531
6532         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
6533
6534         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
6535         dns_db_detach((dns_db_t **)(void*)&rbtdb);
6536
6537         *arg = NULL;
6538 }
6539
6540 static void
6541 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
6542                       acache_cbarg_t **cbargp)
6543 {
6544         acache_cbarg_t *cbarg;
6545
6546         REQUIRE(mctx != NULL);
6547         REQUIRE(entry != NULL);
6548         REQUIRE(cbargp != NULL && *cbargp != NULL);
6549
6550         cbarg = *cbargp;
6551
6552         dns_acache_cancelentry(entry);
6553         dns_db_detachnode(cbarg->db, &cbarg->node);
6554         dns_db_detach(&cbarg->db);
6555
6556         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
6557
6558         *cbargp = NULL;
6559 }
6560
6561 static isc_result_t
6562 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
6563                        dns_rdatatype_t qtype, dns_acache_t *acache,
6564                        dns_zone_t *zone, dns_db_t *db,
6565                        dns_dbversion_t *version, dns_dbnode_t *node,
6566                        dns_name_t *fname)
6567 {
6568         dns_rbtdb_t *rbtdb = rdataset->private1;
6569         dns_rbtnode_t *rbtnode = rdataset->private2;
6570         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
6571         unsigned int current_count = rdataset->privateuint4;
6572         rdatasetheader_t *header;
6573         unsigned int total_count, count;
6574         nodelock_t *nodelock;
6575         isc_result_t result;
6576         acachectl_t *acarray;
6577         dns_acacheentry_t *newentry, *oldentry = NULL;
6578         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
6579
6580         UNUSED(qtype);
6581
6582         if (type == dns_rdatasetadditional_fromcache)
6583                 return (ISC_R_SUCCESS);
6584
6585         header = (struct rdatasetheader *)(raw - sizeof(*header));
6586
6587         total_count = raw[0] * 256 + raw[1];
6588         INSIST(total_count > current_count);
6589         count = total_count - current_count - 1; /* should be private data */
6590
6591         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
6592         if (newcbarg == NULL)
6593                 return (ISC_R_NOMEMORY);
6594         newcbarg->type = type;
6595         newcbarg->count = count;
6596         newcbarg->header = header;
6597         newcbarg->db = NULL;
6598         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
6599         newcbarg->node = NULL;
6600         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
6601                           &newcbarg->node);
6602         newentry = NULL;
6603         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
6604                                         acache_callback, newcbarg, &newentry);
6605         if (result != ISC_R_SUCCESS)
6606                 goto fail;
6607         /* Set cache data in the new entry. */
6608         result = dns_acache_setentry(acache, newentry, zone, db,
6609                                      version, node, fname);
6610         if (result != ISC_R_SUCCESS)
6611                 goto fail;
6612
6613         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
6614         NODE_LOCK(nodelock, isc_rwlocktype_write);
6615
6616         acarray = NULL;
6617         switch (type) {
6618         case dns_rdatasetadditional_fromauth:
6619                 acarray = header->additional_auth;
6620                 break;
6621         case dns_rdatasetadditional_fromglue:
6622                 acarray = header->additional_glue;
6623                 break;
6624         default:
6625                 INSIST(0);
6626         }
6627
6628         if (acarray == NULL) {
6629                 unsigned int i;
6630
6631                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
6632                                       sizeof(acachectl_t));
6633
6634                 if (acarray == NULL) {
6635                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
6636                         goto fail;
6637                 }
6638
6639                 for (i = 0; i < total_count; i++) {
6640                         acarray[i].entry = NULL;
6641                         acarray[i].cbarg = NULL;
6642                 }
6643         }
6644         switch (type) {
6645         case dns_rdatasetadditional_fromauth:
6646                 header->additional_auth = acarray;
6647                 break;
6648         case dns_rdatasetadditional_fromglue:
6649                 header->additional_glue = acarray;
6650                 break;
6651         default:
6652                 INSIST(0);
6653         }
6654
6655         if (acarray[count].entry != NULL) {
6656                 /*
6657                  * Swap the entry.  Delay cleaning-up the old entry since
6658                  * it would require a node lock.
6659                  */
6660                 oldentry = acarray[count].entry;
6661                 INSIST(acarray[count].cbarg != NULL);
6662                 oldcbarg = acarray[count].cbarg;
6663         }
6664         acarray[count].entry = newentry;
6665         acarray[count].cbarg = newcbarg;
6666
6667         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
6668
6669         if (oldentry != NULL) {
6670                 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
6671                 dns_acache_detachentry(&oldentry);
6672         }
6673
6674         return (ISC_R_SUCCESS);
6675
6676   fail:
6677         if (newcbarg != NULL) {
6678                 if (newentry != NULL) {
6679                         acache_cancelentry(rbtdb->common.mctx, newentry,
6680                                            &newcbarg);
6681                         dns_acache_detachentry(&newentry);
6682                 } else {
6683                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
6684                         dns_db_detach(&newcbarg->db);
6685                         isc_mem_put(rbtdb->common.mctx, newcbarg,
6686                             sizeof(*newcbarg));
6687                 }
6688         }
6689
6690         return (result);
6691 }
6692
6693 static isc_result_t
6694 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
6695                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
6696 {
6697         dns_rbtdb_t *rbtdb = rdataset->private1;
6698         dns_rbtnode_t *rbtnode = rdataset->private2;
6699         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
6700         unsigned int current_count = rdataset->privateuint4;
6701         rdatasetheader_t *header;
6702         nodelock_t *nodelock;
6703         unsigned int total_count, count;
6704         acachectl_t *acarray;
6705         dns_acacheentry_t *entry;
6706         acache_cbarg_t *cbarg;
6707
6708         UNUSED(qtype);          /* we do not use this value at least for now */
6709         UNUSED(acache);
6710
6711         if (type == dns_rdatasetadditional_fromcache)
6712                 return (ISC_R_SUCCESS);
6713
6714         header = (struct rdatasetheader *)(raw - sizeof(*header));
6715
6716         total_count = raw[0] * 256 + raw[1];
6717         INSIST(total_count > current_count);
6718         count = total_count - current_count - 1;
6719
6720         acarray = NULL;
6721         entry = NULL;
6722
6723         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
6724         NODE_LOCK(nodelock, isc_rwlocktype_write);
6725
6726         switch (type) {
6727         case dns_rdatasetadditional_fromauth:
6728                 acarray = header->additional_auth;
6729                 break;
6730         case dns_rdatasetadditional_fromglue:
6731                 acarray = header->additional_glue;
6732                 break;
6733         default:
6734                 INSIST(0);
6735         }
6736
6737         if (acarray == NULL) {
6738                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
6739                 return (ISC_R_NOTFOUND);
6740         }
6741
6742         entry = acarray[count].entry;
6743         if (entry == NULL) {
6744                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
6745                 return (ISC_R_NOTFOUND);
6746         }
6747
6748         acarray[count].entry = NULL;
6749         cbarg = acarray[count].cbarg;
6750         acarray[count].cbarg = NULL;
6751
6752         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
6753
6754         if (entry != NULL) {
6755                 acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
6756                 dns_acache_detachentry(&entry);
6757         }
6758
6759         return (ISC_R_SUCCESS);
6760 }