]> CyberLeo.Net >> Repos - FreeBSD/releng/8.2.git/blob - contrib/bind9/lib/dns/rbtdb.c
Copy stable/8 to releng/8.2 in preparation for FreeBSD-8.2 release.
[FreeBSD/releng/8.2.git] / contrib / bind9 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004-2010  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: rbtdb.c,v 1.270.12.16.10.6 2010/11/16 07:46:23 marka Exp $ */
19
20 /*! \file */
21
22 /*
23  * Principal Author: Bob Halley
24  */
25
26 #include <config.h>
27
28 /* #define inline */
29
30 #include <isc/event.h>
31 #include <isc/heap.h>
32 #include <isc/mem.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
41 #include <isc/task.h>
42 #include <isc/time.h>
43 #include <isc/util.h>
44
45 #include <dns/acache.h>
46 #include <dns/db.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
50 #include <dns/lib.h>
51 #include <dns/log.h>
52 #include <dns/masterdump.h>
53 #include <dns/nsec.h>
54 #include <dns/nsec3.h>
55 #include <dns/rbt.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
63 #include <dns/view.h>
64 #include <dns/zone.h>
65 #include <dns/zonekey.h>
66
67 #ifdef DNS_RBTDB_VERSION64
68 #include "rbtdb64.h"
69 #else
70 #include "rbtdb.h"
71 #endif
72
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
75 #else
76 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
77 #endif
78
79 /*%
80  * Note that "impmagic" is not the first four bytes of the struct, so
81  * ISC_MAGIC_VALID cannot be used.
82  */
83 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
84                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
85
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t                    rbtdb_serial_t;
88 /*%
89  * Make casting easier in symbolic debuggers by using different names
90  * for the 64 bit version.
91  */
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
95 #else
96 typedef isc_uint32_t                    rbtdb_serial_t;
97 #endif
98
99 typedef isc_uint32_t                    rbtdb_rdatatype_t;
100
101 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
104
105 #define RBTDB_RDATATYPE_SIGNSEC \
106                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_NCACHEANY \
116                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
117
118 /*
119  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
120  * Using rwlock is effective with regard to lookup performance only when
121  * it is implemented in an efficient way.
122  * Otherwise, it is generally wise to stick to the simple locking since rwlock
123  * would require more memory or can even make lookups slower due to its own
124  * overhead (when it internally calls mutex locks).
125  */
126 #ifdef ISC_RWLOCK_USEATOMIC
127 #define DNS_RBTDB_USERWLOCK 1
128 #else
129 #define DNS_RBTDB_USERWLOCK 0
130 #endif
131
132 #if DNS_RBTDB_USERWLOCK
133 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
134 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
135 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
136 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
137 #else
138 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
139 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
140 #define RBTDB_LOCK(l, t)        LOCK(l)
141 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
142 #endif
143
144 /*
145  * Since node locking is sensitive to both performance and memory footprint,
146  * we need some trick here.  If we have both high-performance rwlock and
147  * high performance and small-memory reference counters, we use rwlock for
148  * node lock and isc_refcount for node references.  In this case, we don't have
149  * to protect the access to the counters by locks.
150  * Otherwise, we simply use ordinary mutex lock for node locking, and use
151  * simple integers as reference counters which is protected by the lock.
152  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
153  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
154  * counters first and then protect other parts of a node as read-only data.
155  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
156  * provided for these special cases.  When we can use the efficient backend
157  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
158  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
159  * section including the access to the reference counter.
160  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
161  * section is also protected by NODE_STRONGLOCK().
162  */
163 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
164 typedef isc_rwlock_t nodelock_t;
165
166 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
167 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
168 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
169 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
170 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
171
172 #define NODE_STRONGLOCK(l)      ((void)0)
173 #define NODE_STRONGUNLOCK(l)    ((void)0)
174 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
175 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
176 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
177 #else
178 typedef isc_mutex_t nodelock_t;
179
180 #define NODE_INITLOCK(l)        isc_mutex_init(l)
181 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
182 #define NODE_LOCK(l, t)         LOCK(l)
183 #define NODE_UNLOCK(l, t)       UNLOCK(l)
184 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
185
186 #define NODE_STRONGLOCK(l)      LOCK(l)
187 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
188 #define NODE_WEAKLOCK(l, t)     ((void)0)
189 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
190 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
191 #endif
192
193 /*%
194  * Whether to rate-limit updating the LRU to avoid possible thread contention.
195  * Our performance measurement has shown the cost is marginal, so it's defined
196  * to be 0 by default either with or without threads.
197  */
198 #ifndef DNS_RBTDB_LIMITLRUUPDATE
199 #define DNS_RBTDB_LIMITLRUUPDATE 0
200 #endif
201
202 /*
203  * Allow clients with a virtual time of up to 5 minutes in the past to see
204  * records that would have otherwise have expired.
205  */
206 #define RBTDB_VIRTUAL 300
207
208 struct noqname {
209         dns_name_t      name;
210         void *          neg;
211         void *          negsig;
212         dns_rdatatype_t type;
213 };
214
215 typedef struct acachectl acachectl_t;
216
217 typedef struct rdatasetheader {
218         /*%
219          * Locked by the owning node's lock.
220          */
221         rbtdb_serial_t                  serial;
222         dns_ttl_t                       rdh_ttl;
223         rbtdb_rdatatype_t               type;
224         isc_uint16_t                    attributes;
225         dns_trust_t                     trust;
226         struct noqname                  *noqname;
227         struct noqname                  *closest;
228         /*%<
229          * We don't use the LIST macros, because the LIST structure has
230          * both head and tail pointers, and is doubly linked.
231          */
232
233         struct rdatasetheader           *next;
234         /*%<
235          * If this is the top header for an rdataset, 'next' points
236          * to the top header for the next rdataset (i.e., the next type).
237          * Otherwise, it points up to the header whose down pointer points
238          * at this header.
239          */
240
241         struct rdatasetheader           *down;
242         /*%<
243          * Points to the header for the next older version of
244          * this rdataset.
245          */
246
247         isc_uint32_t                    count;
248         /*%<
249          * Monotonously increased every time this rdataset is bound so that
250          * it is used as the base of the starting point in DNS responses
251          * when the "cyclic" rrset-order is required.  Since the ordering
252          * should not be so crucial, no lock is set for the counter for
253          * performance reasons.
254          */
255
256         acachectl_t                     *additional_auth;
257         acachectl_t                     *additional_glue;
258
259         dns_rbtnode_t                   *node;
260         isc_stdtime_t                   last_used;
261         ISC_LINK(struct rdatasetheader) link;
262
263         unsigned int                    heap_index;
264         /*%<
265          * Used for TTL-based cache cleaning.
266          */
267         isc_stdtime_t                   resign;
268 } rdatasetheader_t;
269
270 typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
271 typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;
272
273 #define RDATASET_ATTR_NONEXISTENT       0x0001
274 #define RDATASET_ATTR_STALE             0x0002
275 #define RDATASET_ATTR_IGNORE            0x0004
276 #define RDATASET_ATTR_RETAIN            0x0008
277 #define RDATASET_ATTR_NXDOMAIN          0x0010
278 #define RDATASET_ATTR_RESIGN            0x0020
279 #define RDATASET_ATTR_STATCOUNT         0x0040
280 #define RDATASET_ATTR_OPTOUT            0x0080
281
282 typedef struct acache_cbarg {
283         dns_rdatasetadditional_t        type;
284         unsigned int                    count;
285         dns_db_t                        *db;
286         dns_dbnode_t                    *node;
287         rdatasetheader_t                *header;
288 } acache_cbarg_t;
289
290 struct acachectl {
291         dns_acacheentry_t               *entry;
292         acache_cbarg_t                  *cbarg;
293 };
294
295 /*
296  * XXX
297  * When the cache will pre-expire data (due to memory low or other
298  * situations) before the rdataset's TTL has expired, it MUST
299  * respect the RETAIN bit and not expire the data until its TTL is
300  * expired.
301  */
302
303 #undef IGNORE                   /* WIN32 winbase.h defines this. */
304
305 #define EXISTS(header) \
306         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
307 #define NONEXISTENT(header) \
308         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
309 #define IGNORE(header) \
310         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
311 #define RETAIN(header) \
312         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
313 #define NXDOMAIN(header) \
314         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
315 #define RESIGN(header) \
316         (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
317 #define OPTOUT(header) \
318         (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
319
320 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
321
322 /*%
323  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
324  * There is a tradeoff issue about configuring this value: if this is too
325  * small, it may cause heavier contention between threads; if this is too large,
326  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
327  * The default value should work well for most environments, but this can
328  * also be configurable at compilation time via the
329  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
330  * 1 due to the assumption of overmem_purge().
331  */
332 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
333 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
334 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
335 #else
336 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
337 #endif
338 #else
339 #define DEFAULT_CACHE_NODE_LOCK_COUNT   16
340 #endif  /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
341
342 typedef struct {
343         nodelock_t                      lock;
344         /* Protected in the refcount routines. */
345         isc_refcount_t                  references;
346         /* Locked by lock. */
347         isc_boolean_t                   exiting;
348 } rbtdb_nodelock_t;
349
350 typedef struct rbtdb_changed {
351         dns_rbtnode_t *                 node;
352         isc_boolean_t                   dirty;
353         ISC_LINK(struct rbtdb_changed)  link;
354 } rbtdb_changed_t;
355
356 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
357
358 typedef enum {
359         dns_db_insecure,
360         dns_db_partial,
361         dns_db_secure
362 } dns_db_secure_t;
363
364 typedef struct rbtdb_version {
365         /* Not locked */
366         rbtdb_serial_t                  serial;
367         /*
368          * Protected in the refcount routines.
369          * XXXJT: should we change the lock policy based on the refcount
370          * performance?
371          */
372         isc_refcount_t                  references;
373         /* Locked by database lock. */
374         isc_boolean_t                   writer;
375         isc_boolean_t                   commit_ok;
376         rbtdb_changedlist_t             changed_list;
377         rdatasetheaderlist_t            resigned_list;
378         ISC_LINK(struct rbtdb_version)  link;
379         dns_db_secure_t                 secure;
380         isc_boolean_t                   havensec3;
381         /* NSEC3 parameters */
382         dns_hash_t                      hash;
383         isc_uint8_t                     flags;
384         isc_uint16_t                    iterations;
385         isc_uint8_t                     salt_length;
386         unsigned char                   salt[DNS_NSEC3_SALTSIZE];
387 } rbtdb_version_t;
388
389 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
390
391 typedef struct {
392         /* Unlocked. */
393         dns_db_t                        common;
394 #if DNS_RBTDB_USERWLOCK
395         isc_rwlock_t                    lock;
396 #else
397         isc_mutex_t                     lock;
398 #endif
399         isc_rwlock_t                    tree_lock;
400         unsigned int                    node_lock_count;
401         rbtdb_nodelock_t *              node_locks;
402         dns_rbtnode_t *                 origin_node;
403         dns_stats_t *                   rrsetstats; /* cache DB only */
404         /* Locked by lock. */
405         unsigned int                    active;
406         isc_refcount_t                  references;
407         unsigned int                    attributes;
408         rbtdb_serial_t                  current_serial;
409         rbtdb_serial_t                  least_serial;
410         rbtdb_serial_t                  next_serial;
411         rbtdb_version_t *               current_version;
412         rbtdb_version_t *               future_version;
413         rbtdb_versionlist_t             open_versions;
414         isc_task_t *                    task;
415         dns_dbnode_t                    *soanode;
416         dns_dbnode_t                    *nsnode;
417
418         /*
419          * This is a linked list used to implement the LRU cache.  There will
420          * be node_lock_count linked lists here.  Nodes in bucket 1 will be
421          * placed on the linked list rdatasets[1].
422          */
423         rdatasetheaderlist_t            *rdatasets;
424
425         /*%
426          * Temporary storage for stale cache nodes and dynamically deleted
427          * nodes that await being cleaned up.
428          */
429         rbtnodelist_t                   *deadnodes;
430
431         /*
432          * Heaps.  Each of these is used for TTL based expiry.
433          */
434         isc_heap_t                      **heaps;
435
436         /* Locked by tree_lock. */
437         dns_rbt_t *                     tree;
438         dns_rbt_t *                     nsec3;
439
440         /* Unlocked */
441         unsigned int                    quantum;
442 } dns_rbtdb_t;
443
444 #define RBTDB_ATTR_LOADED               0x01
445 #define RBTDB_ATTR_LOADING              0x02
446
447 /*%
448  * Search Context
449  */
450 typedef struct {
451         dns_rbtdb_t *           rbtdb;
452         rbtdb_version_t *       rbtversion;
453         rbtdb_serial_t          serial;
454         unsigned int            options;
455         dns_rbtnodechain_t      chain;
456         isc_boolean_t           copy_name;
457         isc_boolean_t           need_cleanup;
458         isc_boolean_t           wild;
459         dns_rbtnode_t *         zonecut;
460         rdatasetheader_t *      zonecut_rdataset;
461         rdatasetheader_t *      zonecut_sigrdataset;
462         dns_fixedname_t         zonecut_name;
463         isc_stdtime_t           now;
464 } rbtdb_search_t;
465
466 /*%
467  * Load Context
468  */
469 typedef struct {
470         dns_rbtdb_t *           rbtdb;
471         isc_stdtime_t           now;
472 } rbtdb_load_t;
473
474 static void rdataset_disassociate(dns_rdataset_t *rdataset);
475 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
476 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
477 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
478 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
479 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
480 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
481                                         dns_name_t *name,
482                                         dns_rdataset_t *neg,
483                                         dns_rdataset_t *negsig);
484 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
485                                         dns_name_t *name,
486                                         dns_rdataset_t *neg,
487                                         dns_rdataset_t *negsig);
488 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
489                                            dns_rdatasetadditional_t type,
490                                            dns_rdatatype_t qtype,
491                                            dns_acache_t *acache,
492                                            dns_zone_t **zonep,
493                                            dns_db_t **dbp,
494                                            dns_dbversion_t **versionp,
495                                            dns_dbnode_t **nodep,
496                                            dns_name_t *fname,
497                                            dns_message_t *msg,
498                                            isc_stdtime_t now);
499 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
500                                            dns_rdatasetadditional_t type,
501                                            dns_rdatatype_t qtype,
502                                            dns_acache_t *acache,
503                                            dns_zone_t *zone,
504                                            dns_db_t *db,
505                                            dns_dbversion_t *version,
506                                            dns_dbnode_t *node,
507                                            dns_name_t *fname);
508 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
509                                            dns_rdataset_t *rdataset,
510                                            dns_rdatasetadditional_t type,
511                                            dns_rdatatype_t qtype);
512 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
513                                               isc_stdtime_t now);
514 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
515                           isc_stdtime_t now);
516 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
517                           isc_boolean_t tree_locked);
518 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
519                           isc_stdtime_t now, isc_boolean_t tree_locked);
520 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
521                                   rdatasetheader_t *newheader);
522 static void prune_tree(isc_task_t *task, isc_event_t *event);
523 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
524 static void rdataset_expire(dns_rdataset_t *rdataset);
525
526 static dns_rdatasetmethods_t rdataset_methods = {
527         rdataset_disassociate,
528         rdataset_first,
529         rdataset_next,
530         rdataset_current,
531         rdataset_clone,
532         rdataset_count,
533         NULL,
534         rdataset_getnoqname,
535         NULL,
536         rdataset_getclosest,
537         rdataset_getadditional,
538         rdataset_setadditional,
539         rdataset_putadditional,
540         rdataset_settrust,
541         rdataset_expire
542 };
543
544 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
545 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
546 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
547 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
548                                  dns_rdataset_t *rdataset);
549
550 static dns_rdatasetitermethods_t rdatasetiter_methods = {
551         rdatasetiter_destroy,
552         rdatasetiter_first,
553         rdatasetiter_next,
554         rdatasetiter_current
555 };
556
557 typedef struct rbtdb_rdatasetiter {
558         dns_rdatasetiter_t              common;
559         rdatasetheader_t *              current;
560 } rbtdb_rdatasetiter_t;
561
562 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
563 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
564 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
565 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
566                                         dns_name_t *name);
567 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
568 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
569 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
570                                            dns_dbnode_t **nodep,
571                                            dns_name_t *name);
572 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
573 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
574                                           dns_name_t *name);
575
576 static dns_dbiteratormethods_t dbiterator_methods = {
577         dbiterator_destroy,
578         dbiterator_first,
579         dbiterator_last,
580         dbiterator_seek,
581         dbiterator_prev,
582         dbiterator_next,
583         dbiterator_current,
584         dbiterator_pause,
585         dbiterator_origin
586 };
587
588 #define DELETION_BATCH_MAX 64
589
590 /*
591  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
592  */
593 typedef struct rbtdb_dbiterator {
594         dns_dbiterator_t                common;
595         isc_boolean_t                   paused;
596         isc_boolean_t                   new_origin;
597         isc_rwlocktype_t                tree_locked;
598         isc_result_t                    result;
599         dns_fixedname_t                 name;
600         dns_fixedname_t                 origin;
601         dns_rbtnodechain_t              chain;
602         dns_rbtnodechain_t              nsec3chain;
603         dns_rbtnodechain_t              *current;
604         dns_rbtnode_t                   *node;
605         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
606         int                             delete;
607         isc_boolean_t                   nsec3only;
608         isc_boolean_t                   nonsec3;
609 } rbtdb_dbiterator_t;
610
611
612 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
613 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
614
615 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
616                        isc_event_t *event);
617 static void overmem(dns_db_t *db, isc_boolean_t overmem);
618 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
619                                isc_boolean_t *nsec3createflag);
620
621 /*%
622  * 'init_count' is used to initialize 'newheader->count' which inturn
623  * is used to determine where in the cycle rrset-order cyclic starts.
624  * We don't lock this as we don't care about simultaneous updates.
625  *
626  * Note:
627  *      Both init_count and header->count can be ISC_UINT32_MAX.
628  *      The count on the returned rdataset however can't be as
629  *      that indicates that the database does not implement cyclic
630  *      processing.
631  */
632 static unsigned int init_count;
633
634 /*
635  * Locking
636  *
637  * If a routine is going to lock more than one lock in this module, then
638  * the locking must be done in the following order:
639  *
640  *      Tree Lock
641  *
642  *      Node Lock       (Only one from the set may be locked at one time by
643  *                       any caller)
644  *
645  *      Database Lock
646  *
647  * Failure to follow this hierarchy can result in deadlock.
648  */
649
650 /*
651  * Deleting Nodes
652  *
653  * For zone databases the node for the origin of the zone MUST NOT be deleted.
654  */
655
656
657 /*
658  * DB Routines
659  */
660
661 static void
662 attach(dns_db_t *source, dns_db_t **targetp) {
663         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
664
665         REQUIRE(VALID_RBTDB(rbtdb));
666
667         isc_refcount_increment(&rbtdb->references, NULL);
668
669         *targetp = source;
670 }
671
672 static void
673 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
674         dns_rbtdb_t *rbtdb = event->ev_arg;
675
676         UNUSED(task);
677
678         free_rbtdb(rbtdb, ISC_TRUE, event);
679 }
680
681 static void
682 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
683                   isc_boolean_t increment)
684 {
685         dns_rdatastatstype_t statattributes = 0;
686         dns_rdatastatstype_t base = 0;
687         dns_rdatastatstype_t type;
688
689         /* At the moment we count statistics only for cache DB */
690         INSIST(IS_CACHE(rbtdb));
691
692         if (NXDOMAIN(header))
693                 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
694         else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
695                 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
696                 base = RBTDB_RDATATYPE_EXT(header->type);
697         } else
698                 base = RBTDB_RDATATYPE_BASE(header->type);
699
700         type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
701         if (increment)
702                 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
703         else
704                 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
705 }
706
707 static void
708 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
709         int idx;
710         isc_heap_t *heap;
711         dns_ttl_t oldttl;
712
713         oldttl = header->rdh_ttl;
714         header->rdh_ttl = newttl;
715
716         if (!IS_CACHE(rbtdb))
717                 return;
718
719         /*
720          * It's possible the rbtdb is not a cache.  If this is the case,
721          * we will not have a heap, and we move on.  If we do, though,
722          * we might need to adjust things.
723          */
724         if (header->heap_index == 0 || newttl == oldttl)
725                 return;
726         idx = header->node->locknum;
727         if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
728             return;
729         heap = rbtdb->heaps[idx];
730
731         if (newttl < oldttl)
732                 isc_heap_increased(heap, header->heap_index);
733         else
734                 isc_heap_decreased(heap, header->heap_index);
735 }
736
737 /*%
738  * These functions allow the heap code to rank the priority of each
739  * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
740  */
741 static isc_boolean_t
742 ttl_sooner(void *v1, void *v2) {
743         rdatasetheader_t *h1 = v1;
744         rdatasetheader_t *h2 = v2;
745
746         if (h1->rdh_ttl < h2->rdh_ttl)
747                 return (ISC_TRUE);
748         return (ISC_FALSE);
749 }
750
751 static isc_boolean_t
752 resign_sooner(void *v1, void *v2) {
753         rdatasetheader_t *h1 = v1;
754         rdatasetheader_t *h2 = v2;
755
756         if (h1->resign < h2->resign)
757                 return (ISC_TRUE);
758         return (ISC_FALSE);
759 }
760
761 /*%
762  * This function sets the heap index into the header.
763  */
764 static void
765 set_index(void *what, unsigned int index) {
766         rdatasetheader_t *h = what;
767
768         h->heap_index = index;
769 }
770
771 /*%
772  * Work out how many nodes can be deleted in the time between two
773  * requests to the nameserver.  Smooth the resulting number and use it
774  * as a estimate for the number of nodes to be deleted in the next
775  * iteration.
776  */
777 static unsigned int
778 adjust_quantum(unsigned int old, isc_time_t *start) {
779         unsigned int pps = dns_pps;     /* packets per second */
780         unsigned int interval;
781         isc_uint64_t usecs;
782         isc_time_t end;
783         unsigned int new;
784
785         if (pps < 100)
786                 pps = 100;
787         isc_time_now(&end);
788
789         interval = 1000000 / pps;       /* interval in usec */
790         if (interval == 0)
791                 interval = 1;
792         usecs = isc_time_microdiff(&end, start);
793         if (usecs == 0) {
794                 /*
795                  * We were unable to measure the amount of time taken.
796                  * Double the nodes deleted next time.
797                  */
798                 old *= 2;
799                 if (old > 1000)
800                         old = 1000;
801                 return (old);
802         }
803         new = old * interval;
804         new /= (unsigned int)usecs;
805         if (new == 0)
806                 new = 1;
807         else if (new > 1000)
808                 new = 1000;
809
810         /* Smooth */
811         new = (new + old * 3) / 4;
812
813         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
814                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
815
816         return (new);
817 }
818
819 static void
820 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
821         unsigned int i;
822         isc_ondestroy_t ondest;
823         isc_result_t result;
824         char buf[DNS_NAME_FORMATSIZE];
825         isc_time_t start;
826
827         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
828                 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
829
830         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
831         REQUIRE(rbtdb->future_version == NULL);
832
833         if (rbtdb->current_version != NULL) {
834                 unsigned int refs;
835
836                 isc_refcount_decrement(&rbtdb->current_version->references,
837                                        &refs);
838                 INSIST(refs == 0);
839                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
840                 isc_refcount_destroy(&rbtdb->current_version->references);
841                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
842                             sizeof(rbtdb_version_t));
843         }
844
845         /*
846          * We assume the number of remaining dead nodes is reasonably small;
847          * the overhead of unlinking all nodes here should be negligible.
848          */
849         for (i = 0; i < rbtdb->node_lock_count; i++) {
850                 dns_rbtnode_t *node;
851
852                 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
853                 while (node != NULL) {
854                         ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
855                         node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
856                 }
857         }
858
859         if (event == NULL)
860                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
861  again:
862         if (rbtdb->tree != NULL) {
863                 isc_time_now(&start);
864                 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
865                 if (result == ISC_R_QUOTA) {
866                         INSIST(rbtdb->task != NULL);
867                         if (rbtdb->quantum != 0)
868                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
869                                                                 &start);
870                         if (event == NULL)
871                                 event = isc_event_allocate(rbtdb->common.mctx,
872                                                            NULL,
873                                                          DNS_EVENT_FREESTORAGE,
874                                                            free_rbtdb_callback,
875                                                            rbtdb,
876                                                            sizeof(isc_event_t));
877                         if (event == NULL)
878                                 goto again;
879                         isc_task_send(rbtdb->task, &event);
880                         return;
881                 }
882                 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
883         }
884
885         if (rbtdb->nsec3 != NULL) {
886                 isc_time_now(&start);
887                 result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
888                 if (result == ISC_R_QUOTA) {
889                         INSIST(rbtdb->task != NULL);
890                         if (rbtdb->quantum != 0)
891                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
892                                                                 &start);
893                         if (event == NULL)
894                                 event = isc_event_allocate(rbtdb->common.mctx,
895                                                            NULL,
896                                                          DNS_EVENT_FREESTORAGE,
897                                                            free_rbtdb_callback,
898                                                            rbtdb,
899                                                            sizeof(isc_event_t));
900                         if (event == NULL)
901                                 goto again;
902                         isc_task_send(rbtdb->task, &event);
903                         return;
904                 }
905                 INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
906         }
907
908         if (event != NULL)
909                 isc_event_free(&event);
910         if (log) {
911                 if (dns_name_dynamic(&rbtdb->common.origin))
912                         dns_name_format(&rbtdb->common.origin, buf,
913                                         sizeof(buf));
914                 else
915                         strcpy(buf, "<UNKNOWN>");
916                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
917                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
918                               "done free_rbtdb(%s)", buf);
919         }
920         if (dns_name_dynamic(&rbtdb->common.origin))
921                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
922         for (i = 0; i < rbtdb->node_lock_count; i++) {
923                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
924                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
925         }
926
927         /*
928          * Clean up LRU / re-signing order lists.
929          */
930         if (rbtdb->rdatasets != NULL) {
931                 for (i = 0; i < rbtdb->node_lock_count; i++)
932                         INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
933                 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
934                             rbtdb->node_lock_count *
935                             sizeof(rdatasetheaderlist_t));
936         }
937         /*
938          * Clean up dead node buckets.
939          */
940         if (rbtdb->deadnodes != NULL) {
941                 for (i = 0; i < rbtdb->node_lock_count; i++)
942                         INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
943                 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
944                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
945         }
946         /*
947          * Clean up heap objects.
948          */
949         if (rbtdb->heaps != NULL) {
950                 for (i = 0; i < rbtdb->node_lock_count; i++)
951                         isc_heap_destroy(&rbtdb->heaps[i]);
952                 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
953                             rbtdb->node_lock_count *
954                             sizeof(isc_heap_t *));
955         }
956
957         if (rbtdb->rrsetstats != NULL)
958                 dns_stats_detach(&rbtdb->rrsetstats);
959
960         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
961                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
962         isc_rwlock_destroy(&rbtdb->tree_lock);
963         isc_refcount_destroy(&rbtdb->references);
964         if (rbtdb->task != NULL)
965                 isc_task_detach(&rbtdb->task);
966
967         RBTDB_DESTROYLOCK(&rbtdb->lock);
968         rbtdb->common.magic = 0;
969         rbtdb->common.impmagic = 0;
970         ondest = rbtdb->common.ondest;
971         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
972         isc_ondestroy_notify(&ondest, rbtdb);
973 }
974
975 static inline void
976 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
977         isc_boolean_t want_free = ISC_FALSE;
978         unsigned int i;
979         unsigned int inactive = 0;
980
981         /* XXX check for open versions here */
982
983         if (rbtdb->soanode != NULL)
984                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
985         if (rbtdb->nsnode != NULL)
986                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
987
988         /*
989          * Even though there are no external direct references, there still
990          * may be nodes in use.
991          */
992         for (i = 0; i < rbtdb->node_lock_count; i++) {
993                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
994                 rbtdb->node_locks[i].exiting = ISC_TRUE;
995                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
996                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
997                     == 0) {
998                         inactive++;
999                 }
1000         }
1001
1002         if (inactive != 0) {
1003                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1004                 rbtdb->active -= inactive;
1005                 if (rbtdb->active == 0)
1006                         want_free = ISC_TRUE;
1007                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1008                 if (want_free) {
1009                         char buf[DNS_NAME_FORMATSIZE];
1010                         if (dns_name_dynamic(&rbtdb->common.origin))
1011                                 dns_name_format(&rbtdb->common.origin, buf,
1012                                                 sizeof(buf));
1013                         else
1014                                 strcpy(buf, "<UNKNOWN>");
1015                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1016                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1017                                       "calling free_rbtdb(%s)", buf);
1018                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
1019                 }
1020         }
1021 }
1022
1023 static void
1024 detach(dns_db_t **dbp) {
1025         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1026         unsigned int refs;
1027
1028         REQUIRE(VALID_RBTDB(rbtdb));
1029
1030         isc_refcount_decrement(&rbtdb->references, &refs);
1031
1032         if (refs == 0)
1033                 maybe_free_rbtdb(rbtdb);
1034
1035         *dbp = NULL;
1036 }
1037
1038 static void
1039 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1040         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1041         rbtdb_version_t *version;
1042         unsigned int refs;
1043
1044         REQUIRE(VALID_RBTDB(rbtdb));
1045
1046         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1047         version = rbtdb->current_version;
1048         isc_refcount_increment(&version->references, &refs);
1049         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1050
1051         *versionp = (dns_dbversion_t *)version;
1052 }
1053
1054 static inline rbtdb_version_t *
1055 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1056                  unsigned int references, isc_boolean_t writer)
1057 {
1058         isc_result_t result;
1059         rbtdb_version_t *version;
1060
1061         version = isc_mem_get(mctx, sizeof(*version));
1062         if (version == NULL)
1063                 return (NULL);
1064         version->serial = serial;
1065         result = isc_refcount_init(&version->references, references);
1066         if (result != ISC_R_SUCCESS) {
1067                 isc_mem_put(mctx, version, sizeof(*version));
1068                 return (NULL);
1069         }
1070         version->writer = writer;
1071         version->commit_ok = ISC_FALSE;
1072         ISC_LIST_INIT(version->changed_list);
1073         ISC_LIST_INIT(version->resigned_list);
1074         ISC_LINK_INIT(version, link);
1075
1076         return (version);
1077 }
1078
1079 static isc_result_t
1080 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1081         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1082         rbtdb_version_t *version;
1083
1084         REQUIRE(VALID_RBTDB(rbtdb));
1085         REQUIRE(versionp != NULL && *versionp == NULL);
1086         REQUIRE(rbtdb->future_version == NULL);
1087
1088         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1089         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
1090         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1091                                    ISC_TRUE);
1092         if (version != NULL) {
1093                 version->commit_ok = ISC_TRUE;
1094                 version->secure = rbtdb->current_version->secure;
1095                 version->havensec3 = rbtdb->current_version->havensec3;
1096                 if (version->havensec3) {
1097                         version->flags = rbtdb->current_version->flags;
1098                         version->iterations =
1099                                 rbtdb->current_version->iterations;
1100                         version->hash = rbtdb->current_version->hash;
1101                         version->salt_length =
1102                                 rbtdb->current_version->salt_length;
1103                         memcpy(version->salt, rbtdb->current_version->salt,
1104                                version->salt_length);
1105                 } else {
1106                         version->flags = 0;
1107                         version->iterations = 0;
1108                         version->hash = 0;
1109                         version->salt_length = 0;
1110                         memset(version->salt, 0, sizeof(version->salt));
1111                 }
1112                 rbtdb->next_serial++;
1113                 rbtdb->future_version = version;
1114         }
1115         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1116
1117         if (version == NULL)
1118                 return (ISC_R_NOMEMORY);
1119
1120         *versionp = version;
1121
1122         return (ISC_R_SUCCESS);
1123 }
1124
1125 static void
1126 attachversion(dns_db_t *db, dns_dbversion_t *source,
1127               dns_dbversion_t **targetp)
1128 {
1129         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1130         rbtdb_version_t *rbtversion = source;
1131         unsigned int refs;
1132
1133         REQUIRE(VALID_RBTDB(rbtdb));
1134
1135         isc_refcount_increment(&rbtversion->references, &refs);
1136         INSIST(refs > 1);
1137
1138         *targetp = rbtversion;
1139 }
1140
1141 static rbtdb_changed_t *
1142 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1143             dns_rbtnode_t *node)
1144 {
1145         rbtdb_changed_t *changed;
1146         unsigned int refs;
1147
1148         /*
1149          * Caller must be holding the node lock if its reference must be
1150          * protected by the lock.
1151          */
1152
1153         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1154
1155         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1156
1157         REQUIRE(version->writer);
1158
1159         if (changed != NULL) {
1160                 dns_rbtnode_refincrement(node, &refs);
1161                 INSIST(refs != 0);
1162                 changed->node = node;
1163                 changed->dirty = ISC_FALSE;
1164                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1165         } else
1166                 version->commit_ok = ISC_FALSE;
1167
1168         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1169
1170         return (changed);
1171 }
1172
1173 static void
1174 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1175                  acachectl_t *array)
1176 {
1177         unsigned int count;
1178         unsigned int i;
1179         unsigned char *raw;     /* RDATASLAB */
1180
1181         /*
1182          * The caller must be holding the corresponding node lock.
1183          */
1184
1185         if (array == NULL)
1186                 return;
1187
1188         raw = (unsigned char *)header + sizeof(*header);
1189         count = raw[0] * 256 + raw[1];
1190
1191         /*
1192          * Sanity check: since an additional cache entry has a reference to
1193          * the original DB node (in the callback arg), there should be no
1194          * acache entries when the node can be freed.
1195          */
1196         for (i = 0; i < count; i++)
1197                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1198
1199         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1200 }
1201
1202 static inline void
1203 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1204
1205         if (dns_name_dynamic(&(*noqname)->name))
1206                 dns_name_free(&(*noqname)->name, mctx);
1207         if ((*noqname)->neg != NULL)
1208                 isc_mem_put(mctx, (*noqname)->neg,
1209                             dns_rdataslab_size((*noqname)->neg, 0));
1210         if ((*noqname)->negsig != NULL)
1211                 isc_mem_put(mctx, (*noqname)->negsig,
1212                             dns_rdataslab_size((*noqname)->negsig, 0));
1213         isc_mem_put(mctx, *noqname, sizeof(**noqname));
1214         *noqname = NULL;
1215 }
1216
1217 static inline void
1218 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1219 {
1220         ISC_LINK_INIT(h, link);
1221         h->heap_index = 0;
1222
1223 #if TRACE_HEADER
1224         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1225                 fprintf(stderr, "initialized header: %p\n", h);
1226 #else
1227         UNUSED(rbtdb);
1228 #endif
1229 }
1230
1231 static inline rdatasetheader_t *
1232 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1233 {
1234         rdatasetheader_t *h;
1235
1236         h = isc_mem_get(mctx, sizeof(*h));
1237         if (h == NULL)
1238                 return (NULL);
1239
1240 #if TRACE_HEADER
1241         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1242                 fprintf(stderr, "allocated header: %p\n", h);
1243 #endif
1244         init_rdataset(rbtdb, h);
1245         return (h);
1246 }
1247
1248 static inline void
1249 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1250 {
1251         unsigned int size;
1252         int idx;
1253
1254         if (EXISTS(rdataset) &&
1255             (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1256                 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1257         }
1258
1259         idx = rdataset->node->locknum;
1260         if (ISC_LINK_LINKED(rdataset, link)) {
1261                 INSIST(IS_CACHE(rbtdb));
1262                 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1263         }
1264         if (rdataset->heap_index != 0)
1265                 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1266         rdataset->heap_index = 0;
1267
1268         if (rdataset->noqname != NULL)
1269                 free_noqname(mctx, &rdataset->noqname);
1270         if (rdataset->closest != NULL)
1271                 free_noqname(mctx, &rdataset->closest);
1272
1273         free_acachearray(mctx, rdataset, rdataset->additional_auth);
1274         free_acachearray(mctx, rdataset, rdataset->additional_glue);
1275
1276         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1277                 size = sizeof(*rdataset);
1278         else
1279                 size = dns_rdataslab_size((unsigned char *)rdataset,
1280                                           sizeof(*rdataset));
1281         isc_mem_put(mctx, rdataset, size);
1282 }
1283
1284 static inline void
1285 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1286         rdatasetheader_t *header, *dcurrent;
1287         isc_boolean_t make_dirty = ISC_FALSE;
1288
1289         /*
1290          * Caller must hold the node lock.
1291          */
1292
1293         /*
1294          * We set the IGNORE attribute on rdatasets with serial number
1295          * 'serial'.  When the reference count goes to zero, these rdatasets
1296          * will be cleaned up; until that time, they will be ignored.
1297          */
1298         for (header = node->data; header != NULL; header = header->next) {
1299                 if (header->serial == serial) {
1300                         header->attributes |= RDATASET_ATTR_IGNORE;
1301                         make_dirty = ISC_TRUE;
1302                 }
1303                 for (dcurrent = header->down;
1304                      dcurrent != NULL;
1305                      dcurrent = dcurrent->down) {
1306                         if (dcurrent->serial == serial) {
1307                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1308                                 make_dirty = ISC_TRUE;
1309                         }
1310                 }
1311         }
1312         if (make_dirty)
1313                 node->dirty = 1;
1314 }
1315
1316 static inline void
1317 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1318 {
1319         rdatasetheader_t *d, *down_next;
1320
1321         for (d = top->down; d != NULL; d = down_next) {
1322                 down_next = d->down;
1323                 free_rdataset(rbtdb, mctx, d);
1324         }
1325         top->down = NULL;
1326 }
1327
1328 static inline void
1329 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1330         rdatasetheader_t *current, *top_prev, *top_next;
1331         isc_mem_t *mctx = rbtdb->common.mctx;
1332
1333         /*
1334          * Caller must be holding the node lock.
1335          */
1336
1337         top_prev = NULL;
1338         for (current = node->data; current != NULL; current = top_next) {
1339                 top_next = current->next;
1340                 clean_stale_headers(rbtdb, mctx, current);
1341                 /*
1342                  * If current is nonexistent or stale, we can clean it up.
1343                  */
1344                 if ((current->attributes &
1345                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1346                         if (top_prev != NULL)
1347                                 top_prev->next = current->next;
1348                         else
1349                                 node->data = current->next;
1350                         free_rdataset(rbtdb, mctx, current);
1351                 } else
1352                         top_prev = current;
1353         }
1354         node->dirty = 0;
1355 }
1356
1357 static inline void
1358 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1359                 rbtdb_serial_t least_serial)
1360 {
1361         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1362         rdatasetheader_t *top_prev, *top_next;
1363         isc_mem_t *mctx = rbtdb->common.mctx;
1364         isc_boolean_t still_dirty = ISC_FALSE;
1365
1366         /*
1367          * Caller must be holding the node lock.
1368          */
1369         REQUIRE(least_serial != 0);
1370
1371         top_prev = NULL;
1372         for (current = node->data; current != NULL; current = top_next) {
1373                 top_next = current->next;
1374
1375                 /*
1376                  * First, we clean up any instances of multiple rdatasets
1377                  * with the same serial number, or that have the IGNORE
1378                  * attribute.
1379                  */
1380                 dparent = current;
1381                 for (dcurrent = current->down;
1382                      dcurrent != NULL;
1383                      dcurrent = down_next) {
1384                         down_next = dcurrent->down;
1385                         INSIST(dcurrent->serial <= dparent->serial);
1386                         if (dcurrent->serial == dparent->serial ||
1387                             IGNORE(dcurrent)) {
1388                                 if (down_next != NULL)
1389                                         down_next->next = dparent;
1390                                 dparent->down = down_next;
1391                                 free_rdataset(rbtdb, mctx, dcurrent);
1392                         } else
1393                                 dparent = dcurrent;
1394                 }
1395
1396                 /*
1397                  * We've now eliminated all IGNORE datasets with the possible
1398                  * exception of current, which we now check.
1399                  */
1400                 if (IGNORE(current)) {
1401                         down_next = current->down;
1402                         if (down_next == NULL) {
1403                                 if (top_prev != NULL)
1404                                         top_prev->next = current->next;
1405                                 else
1406                                         node->data = current->next;
1407                                 free_rdataset(rbtdb, mctx, current);
1408                                 /*
1409                                  * current no longer exists, so we can
1410                                  * just continue with the loop.
1411                                  */
1412                                 continue;
1413                         } else {
1414                                 /*
1415                                  * Pull up current->down, making it the new
1416                                  * current.
1417                                  */
1418                                 if (top_prev != NULL)
1419                                         top_prev->next = down_next;
1420                                 else
1421                                         node->data = down_next;
1422                                 down_next->next = top_next;
1423                                 free_rdataset(rbtdb, mctx, current);
1424                                 current = down_next;
1425                         }
1426                 }
1427
1428                 /*
1429                  * We now try to find the first down node less than the
1430                  * least serial.
1431                  */
1432                 dparent = current;
1433                 for (dcurrent = current->down;
1434                      dcurrent != NULL;
1435                      dcurrent = down_next) {
1436                         down_next = dcurrent->down;
1437                         if (dcurrent->serial < least_serial)
1438                                 break;
1439                         dparent = dcurrent;
1440                 }
1441
1442                 /*
1443                  * If there is a such an rdataset, delete it and any older
1444                  * versions.
1445                  */
1446                 if (dcurrent != NULL) {
1447                         do {
1448                                 down_next = dcurrent->down;
1449                                 INSIST(dcurrent->serial <= least_serial);
1450                                 free_rdataset(rbtdb, mctx, dcurrent);
1451                                 dcurrent = down_next;
1452                         } while (dcurrent != NULL);
1453                         dparent->down = NULL;
1454                 }
1455
1456                 /*
1457                  * Note.  The serial number of 'current' might be less than
1458                  * least_serial too, but we cannot delete it because it is
1459                  * the most recent version, unless it is a NONEXISTENT
1460                  * rdataset.
1461                  */
1462                 if (current->down != NULL) {
1463                         still_dirty = ISC_TRUE;
1464                         top_prev = current;
1465                 } else {
1466                         /*
1467                          * If this is a NONEXISTENT rdataset, we can delete it.
1468                          */
1469                         if (NONEXISTENT(current)) {
1470                                 if (top_prev != NULL)
1471                                         top_prev->next = current->next;
1472                                 else
1473                                         node->data = current->next;
1474                                 free_rdataset(rbtdb, mctx, current);
1475                         } else
1476                                 top_prev = current;
1477                 }
1478         }
1479         if (!still_dirty)
1480                 node->dirty = 0;
1481 }
1482
1483 /*%
1484  * Clean up dead nodes.  These are nodes which have no references, and
1485  * have no data.  They are dead but we could not or chose not to delete
1486  * them when we deleted all the data at that node because we did not want
1487  * to wait for the tree write lock.
1488  *
1489  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1490  */
1491 static void
1492 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1493         dns_rbtnode_t *node;
1494         isc_result_t result;
1495         int count = 10;         /* XXXJT: should be adjustable */
1496
1497         node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1498         while (node != NULL && count > 0) {
1499                 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1500
1501                 /*
1502                  * Since we're holding a tree write lock, it should be
1503                  * impossible for this node to be referenced by others.
1504                  */
1505                 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1506                        node->data == NULL);
1507
1508                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1509                 if (node->nsec3)
1510                         result = dns_rbt_deletenode(rbtdb->nsec3, node,
1511                                                     ISC_FALSE);
1512                 else
1513                         result = dns_rbt_deletenode(rbtdb->tree, node,
1514                                                     ISC_FALSE);
1515                 if (result != ISC_R_SUCCESS)
1516                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1517                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1518                                       "cleanup_dead_nodes: "
1519                                       "dns_rbt_deletenode: %s",
1520                                       isc_result_totext(result));
1521                 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1522                 count--;
1523         }
1524 }
1525
1526 /*
1527  * Caller must be holding the node lock if its reference must be protected
1528  * by the lock.
1529  */
1530 static inline void
1531 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1532         unsigned int lockrefs, noderefs;
1533         isc_refcount_t *lockref;
1534
1535         dns_rbtnode_refincrement0(node, &noderefs);
1536         if (noderefs == 1) {    /* this is the first reference to the node */
1537                 lockref = &rbtdb->node_locks[node->locknum].references;
1538                 isc_refcount_increment0(lockref, &lockrefs);
1539                 INSIST(lockrefs != 0);
1540         }
1541         INSIST(noderefs != 0);
1542 }
1543
1544 /*
1545  * This function is assumed to be called when a node is newly referenced
1546  * and can be in the deadnode list.  In that case the node must be retrieved
1547  * from the list because it is going to be used.  In addition, if the caller
1548  * happens to hold a write lock on the tree, it's a good chance to purge dead
1549  * nodes.
1550  * Note: while a new reference is gained in multiple places, there are only very
1551  * few cases where the node can be in the deadnode list (only empty nodes can
1552  * have been added to the list).
1553  */
1554 static inline void
1555 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1556                 isc_rwlocktype_t treelocktype)
1557 {
1558         isc_boolean_t need_relock = ISC_FALSE;
1559
1560         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1561         new_reference(rbtdb, node);
1562
1563         NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1564                       isc_rwlocktype_read);
1565         if (ISC_LINK_LINKED(node, deadlink))
1566                 need_relock = ISC_TRUE;
1567         else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1568                  treelocktype == isc_rwlocktype_write)
1569                 need_relock = ISC_TRUE;
1570         NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1571                         isc_rwlocktype_read);
1572         if (need_relock) {
1573                 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1574                               isc_rwlocktype_write);
1575                 if (ISC_LINK_LINKED(node, deadlink))
1576                         ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1577                                         node, deadlink);
1578                 if (treelocktype == isc_rwlocktype_write)
1579                         cleanup_dead_nodes(rbtdb, node->locknum);
1580                 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1581                                 isc_rwlocktype_write);
1582         }
1583
1584         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1585 }
1586
1587 /*
1588  * Caller must be holding the node lock; either the "strong", read or write
1589  * lock.  Note that the lock must be held even when node references are
1590  * atomically modified; in that case the decrement operation itself does not
1591  * have to be protected, but we must avoid a race condition where multiple
1592  * threads are decreasing the reference to zero simultaneously and at least
1593  * one of them is going to free the node.
1594  * This function returns ISC_TRUE if and only if the node reference decreases
1595  * to zero.
1596  */
1597 static isc_boolean_t
1598 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1599                     rbtdb_serial_t least_serial,
1600                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1601                     isc_boolean_t pruning)
1602 {
1603         isc_result_t result;
1604         isc_boolean_t write_locked;
1605         rbtdb_nodelock_t *nodelock;
1606         unsigned int refs, nrefs;
1607         int bucket = node->locknum;
1608         isc_boolean_t no_reference;
1609
1610         nodelock = &rbtdb->node_locks[bucket];
1611
1612         /* Handle easy and typical case first. */
1613         if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1614                 dns_rbtnode_refdecrement(node, &nrefs);
1615                 INSIST((int)nrefs >= 0);
1616                 if (nrefs == 0) {
1617                         isc_refcount_decrement(&nodelock->references, &refs);
1618                         INSIST((int)refs >= 0);
1619                 }
1620                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1621         }
1622
1623         /* Upgrade the lock? */
1624         if (nlock == isc_rwlocktype_read) {
1625                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1626                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1627         }
1628         dns_rbtnode_refdecrement(node, &nrefs);
1629         INSIST((int)nrefs >= 0);
1630         if (nrefs > 0) {
1631                 /* Restore the lock? */
1632                 if (nlock == isc_rwlocktype_read)
1633                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1634                 return (ISC_FALSE);
1635         }
1636
1637         if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1638                 if (IS_CACHE(rbtdb))
1639                         clean_cache_node(rbtdb, node);
1640                 else {
1641                         if (least_serial == 0) {
1642                                 /*
1643                                  * Caller doesn't know the least serial.
1644                                  * Get it.
1645                                  */
1646                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1647                                 least_serial = rbtdb->least_serial;
1648                                 RBTDB_UNLOCK(&rbtdb->lock,
1649                                              isc_rwlocktype_read);
1650                         }
1651                         clean_zone_node(rbtdb, node, least_serial);
1652                 }
1653         }
1654
1655         isc_refcount_decrement(&nodelock->references, &refs);
1656         INSIST((int)refs >= 0);
1657
1658         /*
1659          * XXXDCL should this only be done for cache zones?
1660          */
1661         if (node->data != NULL || node->down != NULL) {
1662                 /* Restore the lock? */
1663                 if (nlock == isc_rwlocktype_read)
1664                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1665                 return (ISC_TRUE);
1666         }
1667
1668         /*
1669          * Attempt to switch to a write lock on the tree.  If this fails,
1670          * we will add this node to a linked list of nodes in this locking
1671          * bucket which we will free later.
1672          */
1673         if (tlock != isc_rwlocktype_write) {
1674                 /*
1675                  * Locking hierarchy notwithstanding, we don't need to free
1676                  * the node lock before acquiring the tree write lock because
1677                  * we only do a trylock.
1678                  */
1679                 if (tlock == isc_rwlocktype_read)
1680                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1681                 else
1682                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
1683                                                     isc_rwlocktype_write);
1684                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1685                               result == ISC_R_LOCKBUSY);
1686
1687                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1688         } else
1689                 write_locked = ISC_TRUE;
1690
1691         no_reference = ISC_TRUE;
1692         if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1693                 /*
1694                  * We can now delete the node if the reference counter is
1695                  * zero.  This should be typically the case, but a different
1696                  * thread may still gain a (new) reference just before the
1697                  * current thread locks the tree (e.g., in findnode()).
1698                  */
1699
1700                 /*
1701                  * If this node is the only one in the level it's in, deleting
1702                  * this node may recursively make its parent the only node in
1703                  * the parent level; if so, and if no one is currently using
1704                  * the parent node, this is almost the only opportunity to
1705                  * clean it up.  But the recursive cleanup is not that trivial
1706                  * since the child and parent may be in different lock buckets,
1707                  * which would cause a lock order reversal problem.  To avoid
1708                  * the trouble, we'll dispatch a separate event for batch
1709                  * cleaning.  We need to check whether we're deleting the node
1710                  * as a result of pruning to avoid infinite dispatching.
1711                  * Note: pruning happens only when a task has been set for the
1712                  * rbtdb.  If the user of the rbtdb chooses not to set a task,
1713                  * it's their responsibility to purge stale leaves (e.g. by
1714                  * periodic walk-through).
1715                  */
1716                 if (!pruning && node->parent != NULL &&
1717                     node->parent->down == node && node->left == NULL &&
1718                     node->right == NULL && rbtdb->task != NULL) {
1719                         isc_event_t *ev;
1720                         dns_db_t *db;
1721
1722                         ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1723                                                 DNS_EVENT_RBTPRUNE,
1724                                                 prune_tree, node,
1725                                                 sizeof(isc_event_t));
1726                         if (ev != NULL) {
1727                                 new_reference(rbtdb, node);
1728                                 db = NULL;
1729                                 attach((dns_db_t *)rbtdb, &db);
1730                                 ev->ev_sender = db;
1731                                 isc_task_send(rbtdb->task, &ev);
1732                                 no_reference = ISC_FALSE;
1733                         } else {
1734                                 /*
1735                                  * XXX: this is a weird situation.  We could
1736                                  * ignore this error case, but then the stale
1737                                  * node will unlikely be purged except via a
1738                                  * rare condition such as manual cleanup.  So
1739                                  * we queue it in the deadnodes list, hoping
1740                                  * the memory shortage is temporary and the node
1741                                  * will be deleted later.
1742                                  */
1743                                 isc_log_write(dns_lctx,
1744                                               DNS_LOGCATEGORY_DATABASE,
1745                                               DNS_LOGMODULE_CACHE,
1746                                               ISC_LOG_INFO,
1747                                               "decrement_reference: failed to "
1748                                               "allocate pruning event");
1749                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1750                                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1751                                                 deadlink);
1752                         }
1753                 } else {
1754                         if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1755                                 char printname[DNS_NAME_FORMATSIZE];
1756
1757                                 isc_log_write(dns_lctx,
1758                                               DNS_LOGCATEGORY_DATABASE,
1759                                               DNS_LOGMODULE_CACHE,
1760                                               ISC_LOG_DEBUG(1),
1761                                               "decrement_reference: "
1762                                               "delete from rbt: %p %s",
1763                                               node,
1764                                               dns_rbt_formatnodename(node,
1765                                                         printname,
1766                                                         sizeof(printname)));
1767                         }
1768
1769                         INSIST(!ISC_LINK_LINKED(node, deadlink));
1770                         if (node->nsec3)
1771                                 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1772                                                             ISC_FALSE);
1773                         else
1774                                 result = dns_rbt_deletenode(rbtdb->tree, node,
1775                                                             ISC_FALSE);
1776                         if (result != ISC_R_SUCCESS) {
1777                                 isc_log_write(dns_lctx,
1778                                               DNS_LOGCATEGORY_DATABASE,
1779                                               DNS_LOGMODULE_CACHE,
1780                                               ISC_LOG_WARNING,
1781                                               "decrement_reference: "
1782                                               "dns_rbt_deletenode: %s",
1783                                               isc_result_totext(result));
1784                         }
1785                 }
1786         } else if (dns_rbtnode_refcurrent(node) == 0) {
1787                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1788                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1789         } else
1790                 no_reference = ISC_FALSE;
1791
1792         /* Restore the lock? */
1793         if (nlock == isc_rwlocktype_read)
1794                 NODE_WEAKDOWNGRADE(&nodelock->lock);
1795
1796         /*
1797          * Relock a read lock, or unlock the write lock if no lock was held.
1798          */
1799         if (tlock == isc_rwlocktype_none)
1800                 if (write_locked)
1801                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1802
1803         if (tlock == isc_rwlocktype_read)
1804                 if (write_locked)
1805                         isc_rwlock_downgrade(&rbtdb->tree_lock);
1806
1807         return (no_reference);
1808 }
1809
1810 /*
1811  * Prune the tree by recursively cleaning-up single leaves.  In the worst
1812  * case, the number of iteration is the number of tree levels, which is at
1813  * most the maximum number of domain name labels, i.e, 127.  In practice, this
1814  * should be much smaller (only a few times), and even the worst case would be
1815  * acceptable for a single event.
1816  */
1817 static void
1818 prune_tree(isc_task_t *task, isc_event_t *event) {
1819         dns_rbtdb_t *rbtdb = event->ev_sender;
1820         dns_rbtnode_t *node = event->ev_arg;
1821         dns_rbtnode_t *parent;
1822         unsigned int locknum;
1823
1824         UNUSED(task);
1825
1826         isc_event_free(&event);
1827
1828         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1829         locknum = node->locknum;
1830         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1831         do {
1832                 parent = node->parent;
1833                 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1834                                     isc_rwlocktype_write, ISC_TRUE);
1835
1836                 if (parent != NULL && parent->down == NULL) {
1837                         /*
1838                          * node was the only down child of the parent and has
1839                          * just been removed.  We'll then need to examine the
1840                          * parent.  Keep the lock if possible; otherwise,
1841                          * release the old lock and acquire one for the parent.
1842                          */
1843                         if (parent->locknum != locknum) {
1844                                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1845                                             isc_rwlocktype_write);
1846                                 locknum = parent->locknum;
1847                                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1848                                           isc_rwlocktype_write);
1849                         }
1850
1851                         /*
1852                          * We need to gain a reference to the node before
1853                          * decrementing it in the next iteration.  In addition,
1854                          * if the node is in the dead-nodes list, extract it
1855                          * from the list beforehand as we do in
1856                          * reactivate_node().
1857                          */
1858                         new_reference(rbtdb, parent);
1859                         if (ISC_LINK_LINKED(parent, deadlink)) {
1860                                 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1861                                                 parent, deadlink);
1862                         }
1863                 } else
1864                         parent = NULL;
1865
1866                 node = parent;
1867         } while (node != NULL);
1868         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1869         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1870
1871         detach((dns_db_t **)&rbtdb);
1872 }
1873
1874 static inline void
1875 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1876                    rbtdb_changedlist_t *cleanup_list)
1877 {
1878         /*
1879          * Caller must be holding the database lock.
1880          */
1881
1882         rbtdb->least_serial = version->serial;
1883         *cleanup_list = version->changed_list;
1884         ISC_LIST_INIT(version->changed_list);
1885 }
1886
1887 static inline void
1888 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1889         rbtdb_changed_t *changed, *next_changed;
1890
1891         /*
1892          * If the changed record is dirty, then
1893          * an update created multiple versions of
1894          * a given rdataset.  We keep this list
1895          * until we're the least open version, at
1896          * which point it's safe to get rid of any
1897          * older versions.
1898          *
1899          * If the changed record isn't dirty, then
1900          * we don't need it anymore since we're
1901          * committing and not rolling back.
1902          *
1903          * The caller must be holding the database lock.
1904          */
1905         for (changed = HEAD(version->changed_list);
1906              changed != NULL;
1907              changed = next_changed) {
1908                 next_changed = NEXT(changed, link);
1909                 if (!changed->dirty) {
1910                         UNLINK(version->changed_list,
1911                                changed, link);
1912                         APPEND(*cleanup_list,
1913                                changed, link);
1914                 }
1915         }
1916 }
1917
1918 static void
1919 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1920         dns_rdataset_t keyset;
1921         dns_rdataset_t nsecset, signsecset;
1922         dns_rdata_t rdata = DNS_RDATA_INIT;
1923         isc_boolean_t haszonekey = ISC_FALSE;
1924         isc_boolean_t hasnsec = ISC_FALSE;
1925         isc_boolean_t hasoptbit = ISC_FALSE;
1926         isc_boolean_t nsec3createflag = ISC_FALSE;
1927         isc_result_t result;
1928
1929         dns_rdataset_init(&keyset);
1930         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1931                                      0, 0, &keyset, NULL);
1932         if (result == ISC_R_SUCCESS) {
1933                 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1934                 result = dns_rdataset_first(&keyset);
1935                 while (result == ISC_R_SUCCESS) {
1936                         dns_rdataset_current(&keyset, &keyrdata);
1937                         if (dns_zonekey_iszonekey(&keyrdata)) {
1938                                 haszonekey = ISC_TRUE;
1939                                 break;
1940                         }
1941                         result = dns_rdataset_next(&keyset);
1942                 }
1943                 dns_rdataset_disassociate(&keyset);
1944         }
1945         if (!haszonekey) {
1946                 version->secure = dns_db_insecure;
1947                 version->havensec3 = ISC_FALSE;
1948                 return;
1949         }
1950
1951         dns_rdataset_init(&nsecset);
1952         dns_rdataset_init(&signsecset);
1953         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1954                                      0, 0, &nsecset, &signsecset);
1955         if (result == ISC_R_SUCCESS) {
1956                 if (dns_rdataset_isassociated(&signsecset)) {
1957                         hasnsec = ISC_TRUE;
1958                         result = dns_rdataset_first(&nsecset);
1959                         if (result == ISC_R_SUCCESS) {
1960                                 dns_rdataset_current(&nsecset, &rdata);
1961                                 hasoptbit = dns_nsec_typepresent(&rdata,
1962                                                              dns_rdatatype_opt);
1963                         }
1964                         dns_rdataset_disassociate(&signsecset);
1965                 }
1966                 dns_rdataset_disassociate(&nsecset);
1967         }
1968
1969         setnsec3parameters(db, version, &nsec3createflag);
1970
1971         /*
1972          * Do we have a valid NSEC/NSEC3 chain?
1973          */
1974         if (version->havensec3 || (hasnsec && !hasoptbit))
1975                 version->secure = dns_db_secure;
1976         /*
1977          * Do we have a NSEC/NSEC3 chain under creation?
1978          */
1979         else if (hasoptbit || nsec3createflag)
1980                 version->secure = dns_db_partial;
1981         else
1982                 version->secure = dns_db_insecure;
1983 }
1984
1985 /*%<
1986  * Walk the origin node looking for NSEC3PARAM records.
1987  * Cache the nsec3 parameters.
1988  */
1989 static void
1990 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
1991                    isc_boolean_t *nsec3createflag)
1992 {
1993         dns_rbtnode_t *node;
1994         dns_rdata_nsec3param_t nsec3param;
1995         dns_rdata_t rdata = DNS_RDATA_INIT;
1996         isc_region_t region;
1997         isc_result_t result;
1998         rdatasetheader_t *header, *header_next;
1999         unsigned char *raw;             /* RDATASLAB */
2000         unsigned int count, length;
2001         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2002
2003         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2004         version->havensec3 = ISC_FALSE;
2005         node = rbtdb->origin_node;
2006         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2007                   isc_rwlocktype_read);
2008         for (header = node->data;
2009              header != NULL;
2010              header = header_next) {
2011                 header_next = header->next;
2012                 do {
2013                         if (header->serial <= version->serial &&
2014                             !IGNORE(header)) {
2015                                 if (NONEXISTENT(header))
2016                                         header = NULL;
2017                                 break;
2018                         } else
2019                                 header = header->down;
2020                 } while (header != NULL);
2021
2022                 if (header != NULL &&
2023                     header->type == dns_rdatatype_nsec3param) {
2024                         /*
2025                          * Find A NSEC3PARAM with a supported algorithm.
2026                          */
2027                         raw = (unsigned char *)header + sizeof(*header);
2028                         count = raw[0] * 256 + raw[1]; /* count */
2029 #if DNS_RDATASET_FIXED
2030                         raw += count * 4 + 2;
2031 #else
2032                         raw += 2;
2033 #endif
2034                         while (count-- > 0U) {
2035                                 length = raw[0] * 256 + raw[1];
2036 #if DNS_RDATASET_FIXED
2037                                 raw += 4;
2038 #else
2039                                 raw += 2;
2040 #endif
2041                                 region.base = raw;
2042                                 region.length = length;
2043                                 raw += length;
2044                                 dns_rdata_fromregion(&rdata,
2045                                                      rbtdb->common.rdclass,
2046                                                      dns_rdatatype_nsec3param,
2047                                                      &region);
2048                                 result = dns_rdata_tostruct(&rdata,
2049                                                             &nsec3param,
2050                                                             NULL);
2051                                 INSIST(result == ISC_R_SUCCESS);
2052                                 dns_rdata_reset(&rdata);
2053
2054                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2055                                     !dns_nsec3_supportedhash(nsec3param.hash))
2056                                         continue;
2057
2058 #ifdef RFC5155_STRICT
2059                                 if (nsec3param.flags != 0)
2060                                         continue;
2061 #else
2062                                 if ((nsec3param.flags & DNS_NSEC3FLAG_CREATE)
2063                                     != 0)
2064                                         *nsec3createflag = ISC_TRUE;
2065                                 if ((nsec3param.flags & ~DNS_NSEC3FLAG_OPTOUT)
2066                                     != 0)
2067                                         continue;
2068 #endif
2069
2070                                 memcpy(version->salt, nsec3param.salt,
2071                                        nsec3param.salt_length);
2072                                 version->hash = nsec3param.hash;
2073                                 version->salt_length = nsec3param.salt_length;
2074                                 version->iterations = nsec3param.iterations;
2075                                 version->flags = nsec3param.flags;
2076                                 version->havensec3 = ISC_TRUE;
2077                                 /*
2078                                  * Look for a better algorithm than the
2079                                  * unknown test algorithm.
2080                                  */
2081                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2082                                         goto unlock;
2083                         }
2084                 }
2085         }
2086  unlock:
2087         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2088                     isc_rwlocktype_read);
2089         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2090 }
2091
2092 static void
2093 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2094         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2095         rbtdb_version_t *version, *cleanup_version, *least_greater;
2096         isc_boolean_t rollback = ISC_FALSE;
2097         rbtdb_changedlist_t cleanup_list;
2098         rdatasetheaderlist_t resigned_list;
2099         rbtdb_changed_t *changed, *next_changed;
2100         rbtdb_serial_t serial, least_serial;
2101         dns_rbtnode_t *rbtnode;
2102         unsigned int refs;
2103         rdatasetheader_t *header;
2104         isc_boolean_t writer;
2105
2106         REQUIRE(VALID_RBTDB(rbtdb));
2107         version = (rbtdb_version_t *)*versionp;
2108
2109         cleanup_version = NULL;
2110         ISC_LIST_INIT(cleanup_list);
2111         ISC_LIST_INIT(resigned_list);
2112
2113         isc_refcount_decrement(&version->references, &refs);
2114         if (refs > 0) {         /* typical and easy case first */
2115                 if (commit) {
2116                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2117                         INSIST(!version->writer);
2118                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2119                 }
2120                 goto end;
2121         }
2122
2123         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2124         serial = version->serial;
2125         writer = version->writer;
2126         if (version->writer) {
2127                 if (commit) {
2128                         unsigned cur_ref;
2129                         rbtdb_version_t *cur_version;
2130
2131                         INSIST(version->commit_ok);
2132                         INSIST(version == rbtdb->future_version);
2133                         /*
2134                          * The current version is going to be replaced.
2135                          * Release the (likely last) reference to it from the
2136                          * DB itself and unlink it from the open list.
2137                          */
2138                         cur_version = rbtdb->current_version;
2139                         isc_refcount_decrement(&cur_version->references,
2140                                                &cur_ref);
2141                         if (cur_ref == 0) {
2142                                 if (cur_version->serial == rbtdb->least_serial)
2143                                         INSIST(EMPTY(cur_version->changed_list));
2144                                 UNLINK(rbtdb->open_versions,
2145                                        cur_version, link);
2146                         }
2147                         if (EMPTY(rbtdb->open_versions)) {
2148                                 /*
2149                                  * We're going to become the least open
2150                                  * version.
2151                                  */
2152                                 make_least_version(rbtdb, version,
2153                                                    &cleanup_list);
2154                         } else {
2155                                 /*
2156                                  * Some other open version is the
2157                                  * least version.  We can't cleanup
2158                                  * records that were changed in this
2159                                  * version because the older versions
2160                                  * may still be in use by an open
2161                                  * version.
2162                                  *
2163                                  * We can, however, discard the
2164                                  * changed records for things that
2165                                  * we've added that didn't exist in
2166                                  * prior versions.
2167                                  */
2168                                 cleanup_nondirty(version, &cleanup_list);
2169                         }
2170                         /*
2171                          * If the (soon to be former) current version
2172                          * isn't being used by anyone, we can clean
2173                          * it up.
2174                          */
2175                         if (cur_ref == 0) {
2176                                 cleanup_version = cur_version;
2177                                 APPENDLIST(version->changed_list,
2178                                            cleanup_version->changed_list,
2179                                            link);
2180                         }
2181                         /*
2182                          * Become the current version.
2183                          */
2184                         version->writer = ISC_FALSE;
2185                         rbtdb->current_version = version;
2186                         rbtdb->current_serial = version->serial;
2187                         rbtdb->future_version = NULL;
2188
2189                         /*
2190                          * Keep the current version in the open list, and
2191                          * gain a reference for the DB itself (see the DB
2192                          * creation function below).  This must be the only
2193                          * case where we need to increment the counter from
2194                          * zero and need to use isc_refcount_increment0().
2195                          */
2196                         isc_refcount_increment0(&version->references,
2197                                                 &cur_ref);
2198                         INSIST(cur_ref == 1);
2199                         PREPEND(rbtdb->open_versions,
2200                                 rbtdb->current_version, link);
2201                         resigned_list = version->resigned_list;
2202                         ISC_LIST_INIT(version->resigned_list);
2203                 } else {
2204                         /*
2205                          * We're rolling back this transaction.
2206                          */
2207                         cleanup_list = version->changed_list;
2208                         ISC_LIST_INIT(version->changed_list);
2209                         resigned_list = version->resigned_list;
2210                         ISC_LIST_INIT(version->resigned_list);
2211                         rollback = ISC_TRUE;
2212                         cleanup_version = version;
2213                         rbtdb->future_version = NULL;
2214                 }
2215         } else {
2216                 if (version != rbtdb->current_version) {
2217                         /*
2218                          * There are no external or internal references
2219                          * to this version and it can be cleaned up.
2220                          */
2221                         cleanup_version = version;
2222
2223                         /*
2224                          * Find the version with the least serial
2225                          * number greater than ours.
2226                          */
2227                         least_greater = PREV(version, link);
2228                         if (least_greater == NULL)
2229                                 least_greater = rbtdb->current_version;
2230
2231                         INSIST(version->serial < least_greater->serial);
2232                         /*
2233                          * Is this the least open version?
2234                          */
2235                         if (version->serial == rbtdb->least_serial) {
2236                                 /*
2237                                  * Yes.  Install the new least open
2238                                  * version.
2239                                  */
2240                                 make_least_version(rbtdb,
2241                                                    least_greater,
2242                                                    &cleanup_list);
2243                         } else {
2244                                 /*
2245                                  * Add any unexecuted cleanups to
2246                                  * those of the least greater version.
2247                                  */
2248                                 APPENDLIST(least_greater->changed_list,
2249                                            version->changed_list,
2250                                            link);
2251                         }
2252                 } else if (version->serial == rbtdb->least_serial)
2253                         INSIST(EMPTY(version->changed_list));
2254                 UNLINK(rbtdb->open_versions, version, link);
2255         }
2256         least_serial = rbtdb->least_serial;
2257         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2258
2259         /*
2260          * Update the zone's secure status.
2261          */
2262         if (writer && commit && !IS_CACHE(rbtdb))
2263                 iszonesecure(db, version, rbtdb->origin_node);
2264
2265         if (cleanup_version != NULL) {
2266                 INSIST(EMPTY(cleanup_version->changed_list));
2267                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2268                             sizeof(*cleanup_version));
2269         }
2270
2271         /*
2272          * Commit/rollback re-signed headers.
2273          */
2274         for (header = HEAD(resigned_list);
2275              header != NULL;
2276              header = HEAD(resigned_list)) {
2277                 nodelock_t *lock;
2278
2279                 ISC_LIST_UNLINK(resigned_list, header, link);
2280
2281                 lock = &rbtdb->node_locks[header->node->locknum].lock;
2282                 NODE_LOCK(lock, isc_rwlocktype_write);
2283                 if (rollback)
2284                         resign_insert(rbtdb, header->node->locknum, header);
2285                 decrement_reference(rbtdb, header->node, least_serial,
2286                                     isc_rwlocktype_write, isc_rwlocktype_none,
2287                                     ISC_FALSE);
2288                 NODE_UNLOCK(lock, isc_rwlocktype_write);
2289         }
2290
2291         if (!EMPTY(cleanup_list)) {
2292                 /*
2293                  * We acquire a tree write lock here in order to make sure
2294                  * that stale nodes will be removed in decrement_reference().
2295                  * If we didn't have the lock, those nodes could miss the
2296                  * chance to be removed until the server stops.  The write lock
2297                  * is expensive, but this event should be rare enough to justify
2298                  * the cost.
2299                  */
2300                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2301                 for (changed = HEAD(cleanup_list);
2302                      changed != NULL;
2303                      changed = next_changed) {
2304                         nodelock_t *lock;
2305
2306                         next_changed = NEXT(changed, link);
2307                         rbtnode = changed->node;
2308                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2309
2310                         NODE_LOCK(lock, isc_rwlocktype_write);
2311                         /*
2312                          * This is a good opportunity to purge any dead nodes,
2313                          * so use it.
2314                          */
2315                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2316
2317                         if (rollback)
2318                                 rollback_node(rbtnode, serial);
2319                         decrement_reference(rbtdb, rbtnode, least_serial,
2320                                             isc_rwlocktype_write,
2321                                             isc_rwlocktype_write, ISC_FALSE);
2322
2323                         NODE_UNLOCK(lock, isc_rwlocktype_write);
2324
2325                         isc_mem_put(rbtdb->common.mctx, changed,
2326                                     sizeof(*changed));
2327                 }
2328                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2329         }
2330
2331  end:
2332         *versionp = NULL;
2333 }
2334
2335 /*
2336  * Add the necessary magic for the wildcard name 'name'
2337  * to be found in 'rbtdb'.
2338  *
2339  * In order for wildcard matching to work correctly in
2340  * zone_find(), we must ensure that a node for the wildcarding
2341  * level exists in the database, and has its 'find_callback'
2342  * and 'wild' bits set.
2343  *
2344  * E.g. if the wildcard name is "*.sub.example." then we
2345  * must ensure that "sub.example." exists and is marked as
2346  * a wildcard level.
2347  */
2348 static isc_result_t
2349 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2350         isc_result_t result;
2351         dns_name_t foundname;
2352         dns_offsets_t offsets;
2353         unsigned int n;
2354         dns_rbtnode_t *node = NULL;
2355
2356         dns_name_init(&foundname, offsets);
2357         n = dns_name_countlabels(name);
2358         INSIST(n >= 2);
2359         n--;
2360         dns_name_getlabelsequence(name, 1, n, &foundname);
2361         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2362         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2363                 return (result);
2364         node->nsec3 = 0;
2365         node->find_callback = 1;
2366         node->wild = 1;
2367         return (ISC_R_SUCCESS);
2368 }
2369
2370 static isc_result_t
2371 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2372         isc_result_t result;
2373         dns_name_t foundname;
2374         dns_offsets_t offsets;
2375         unsigned int n, l, i;
2376
2377         dns_name_init(&foundname, offsets);
2378         n = dns_name_countlabels(name);
2379         l = dns_name_countlabels(&rbtdb->common.origin);
2380         i = l + 1;
2381         while (i < n) {
2382                 dns_rbtnode_t *node = NULL;     /* dummy */
2383                 dns_name_getlabelsequence(name, n - i, i, &foundname);
2384                 if (dns_name_iswildcard(&foundname)) {
2385                         result = add_wildcard_magic(rbtdb, &foundname);
2386                         if (result != ISC_R_SUCCESS)
2387                                 return (result);
2388                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
2389                                                  &node);
2390                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2391                                 return (result);
2392                         node->nsec3 = 0;
2393                 }
2394                 i++;
2395         }
2396         return (ISC_R_SUCCESS);
2397 }
2398
2399 static isc_result_t
2400 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2401          dns_dbnode_t **nodep)
2402 {
2403         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2404         dns_rbtnode_t *node = NULL;
2405         dns_name_t nodename;
2406         isc_result_t result;
2407         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2408
2409         REQUIRE(VALID_RBTDB(rbtdb));
2410
2411         dns_name_init(&nodename, NULL);
2412         RWLOCK(&rbtdb->tree_lock, locktype);
2413         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2414                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2415         if (result != ISC_R_SUCCESS) {
2416                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2417                 if (!create) {
2418                         if (result == DNS_R_PARTIALMATCH)
2419                                 result = ISC_R_NOTFOUND;
2420                         return (result);
2421                 }
2422                 /*
2423                  * It would be nice to try to upgrade the lock instead of
2424                  * unlocking then relocking.
2425                  */
2426                 locktype = isc_rwlocktype_write;
2427                 RWLOCK(&rbtdb->tree_lock, locktype);
2428                 node = NULL;
2429                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2430                 if (result == ISC_R_SUCCESS) {
2431                         dns_rbt_namefromnode(node, &nodename);
2432 #ifdef DNS_RBT_USEHASH
2433                         node->locknum = node->hashval % rbtdb->node_lock_count;
2434 #else
2435                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2436                                 rbtdb->node_lock_count;
2437 #endif
2438                         node->nsec3 = 0;
2439                         add_empty_wildcards(rbtdb, name);
2440
2441                         if (dns_name_iswildcard(name)) {
2442                                 result = add_wildcard_magic(rbtdb, name);
2443                                 if (result != ISC_R_SUCCESS) {
2444                                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2445                                         return (result);
2446                                 }
2447                         }
2448                 } else if (result != ISC_R_EXISTS) {
2449                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2450                         return (result);
2451                 }
2452         }
2453         reactivate_node(rbtdb, node, locktype);
2454         RWUNLOCK(&rbtdb->tree_lock, locktype);
2455
2456         *nodep = (dns_dbnode_t *)node;
2457
2458         return (ISC_R_SUCCESS);
2459 }
2460
2461 static isc_result_t
2462 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2463               dns_dbnode_t **nodep)
2464 {
2465         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2466         dns_rbtnode_t *node = NULL;
2467         dns_name_t nodename;
2468         isc_result_t result;
2469         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2470
2471         REQUIRE(VALID_RBTDB(rbtdb));
2472
2473         dns_name_init(&nodename, NULL);
2474         RWLOCK(&rbtdb->tree_lock, locktype);
2475         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2476                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2477         if (result != ISC_R_SUCCESS) {
2478                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2479                 if (!create) {
2480                         if (result == DNS_R_PARTIALMATCH)
2481                                 result = ISC_R_NOTFOUND;
2482                         return (result);
2483                 }
2484                 /*
2485                  * It would be nice to try to upgrade the lock instead of
2486                  * unlocking then relocking.
2487                  */
2488                 locktype = isc_rwlocktype_write;
2489                 RWLOCK(&rbtdb->tree_lock, locktype);
2490                 node = NULL;
2491                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2492                 if (result == ISC_R_SUCCESS) {
2493                         dns_rbt_namefromnode(node, &nodename);
2494 #ifdef DNS_RBT_USEHASH
2495                         node->locknum = node->hashval % rbtdb->node_lock_count;
2496 #else
2497                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2498                                 rbtdb->node_lock_count;
2499 #endif
2500                         node->nsec3 = 1U;
2501                 } else if (result != ISC_R_EXISTS) {
2502                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2503                         return (result);
2504                 }
2505         } else
2506                 INSIST(node->nsec3);
2507         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2508         new_reference(rbtdb, node);
2509         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2510         RWUNLOCK(&rbtdb->tree_lock, locktype);
2511
2512         *nodep = (dns_dbnode_t *)node;
2513
2514         return (ISC_R_SUCCESS);
2515 }
2516
2517 static isc_result_t
2518 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2519         rbtdb_search_t *search = arg;
2520         rdatasetheader_t *header, *header_next;
2521         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2522         rdatasetheader_t *found;
2523         isc_result_t result;
2524         dns_rbtnode_t *onode;
2525
2526         /*
2527          * We only want to remember the topmost zone cut, since it's the one
2528          * that counts, so we'll just continue if we've already found a
2529          * zonecut.
2530          */
2531         if (search->zonecut != NULL)
2532                 return (DNS_R_CONTINUE);
2533
2534         found = NULL;
2535         result = DNS_R_CONTINUE;
2536         onode = search->rbtdb->origin_node;
2537
2538         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2539                   isc_rwlocktype_read);
2540
2541         /*
2542          * Look for an NS or DNAME rdataset active in our version.
2543          */
2544         ns_header = NULL;
2545         dname_header = NULL;
2546         sigdname_header = NULL;
2547         for (header = node->data; header != NULL; header = header_next) {
2548                 header_next = header->next;
2549                 if (header->type == dns_rdatatype_ns ||
2550                     header->type == dns_rdatatype_dname ||
2551                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
2552                         do {
2553                                 if (header->serial <= search->serial &&
2554                                     !IGNORE(header)) {
2555                                         /*
2556                                          * Is this a "this rdataset doesn't
2557                                          * exist" record?
2558                                          */
2559                                         if (NONEXISTENT(header))
2560                                                 header = NULL;
2561                                         break;
2562                                 } else
2563                                         header = header->down;
2564                         } while (header != NULL);
2565                         if (header != NULL) {
2566                                 if (header->type == dns_rdatatype_dname)
2567                                         dname_header = header;
2568                                 else if (header->type ==
2569                                            RBTDB_RDATATYPE_SIGDNAME)
2570                                         sigdname_header = header;
2571                                 else if (node != onode ||
2572                                          IS_STUB(search->rbtdb)) {
2573                                         /*
2574                                          * We've found an NS rdataset that
2575                                          * isn't at the origin node.  We check
2576                                          * that they're not at the origin node,
2577                                          * because otherwise we'd erroneously
2578                                          * treat the zone top as if it were
2579                                          * a delegation.
2580                                          */
2581                                         ns_header = header;
2582                                 }
2583                         }
2584                 }
2585         }
2586
2587         /*
2588          * Did we find anything?
2589          */
2590         if (dname_header != NULL) {
2591                 /*
2592                  * Note that DNAME has precedence over NS if both exist.
2593                  */
2594                 found = dname_header;
2595                 search->zonecut_sigrdataset = sigdname_header;
2596         } else if (ns_header != NULL) {
2597                 found = ns_header;
2598                 search->zonecut_sigrdataset = NULL;
2599         }
2600
2601         if (found != NULL) {
2602                 /*
2603                  * We increment the reference count on node to ensure that
2604                  * search->zonecut_rdataset will still be valid later.
2605                  */
2606                 new_reference(search->rbtdb, node);
2607                 search->zonecut = node;
2608                 search->zonecut_rdataset = found;
2609                 search->need_cleanup = ISC_TRUE;
2610                 /*
2611                  * Since we've found a zonecut, anything beneath it is
2612                  * glue and is not subject to wildcard matching, so we
2613                  * may clear search->wild.
2614                  */
2615                 search->wild = ISC_FALSE;
2616                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2617                         /*
2618                          * If the caller does not want to find glue, then
2619                          * this is the best answer and the search should
2620                          * stop now.
2621                          */
2622                         result = DNS_R_PARTIALMATCH;
2623                 } else {
2624                         dns_name_t *zcname;
2625
2626                         /*
2627                          * The search will continue beneath the zone cut.
2628                          * This may or may not be the best match.  In case it
2629                          * is, we need to remember the node name.
2630                          */
2631                         zcname = dns_fixedname_name(&search->zonecut_name);
2632                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2633                                       ISC_R_SUCCESS);
2634                         search->copy_name = ISC_TRUE;
2635                 }
2636         } else {
2637                 /*
2638                  * There is no zonecut at this node which is active in this
2639                  * version.
2640                  *
2641                  * If this is a "wild" node and the caller hasn't disabled
2642                  * wildcard matching, remember that we've seen a wild node
2643                  * in case we need to go searching for wildcard matches
2644                  * later on.
2645                  */
2646                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2647                         search->wild = ISC_TRUE;
2648         }
2649
2650         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2651                     isc_rwlocktype_read);
2652
2653         return (result);
2654 }
2655
2656 static inline void
2657 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2658               rdatasetheader_t *header, isc_stdtime_t now,
2659               dns_rdataset_t *rdataset)
2660 {
2661         unsigned char *raw;     /* RDATASLAB */
2662
2663         /*
2664          * Caller must be holding the node reader lock.
2665          * XXXJT: technically, we need a writer lock, since we'll increment
2666          * the header count below.  However, since the actual counter value
2667          * doesn't matter, we prioritize performance here.  (We may want to
2668          * use atomic increment when available).
2669          */
2670
2671         if (rdataset == NULL)
2672                 return;
2673
2674         new_reference(rbtdb, node);
2675
2676         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
2677
2678         rdataset->methods = &rdataset_methods;
2679         rdataset->rdclass = rbtdb->common.rdclass;
2680         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2681         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2682         rdataset->ttl = header->rdh_ttl - now;
2683         rdataset->trust = header->trust;
2684         if (NXDOMAIN(header))
2685                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2686         if (OPTOUT(header))
2687                 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2688         rdataset->private1 = rbtdb;
2689         rdataset->private2 = node;
2690         raw = (unsigned char *)header + sizeof(*header);
2691         rdataset->private3 = raw;
2692         rdataset->count = header->count++;
2693         if (rdataset->count == ISC_UINT32_MAX)
2694                 rdataset->count = 0;
2695
2696         /*
2697          * Reset iterator state.
2698          */
2699         rdataset->privateuint4 = 0;
2700         rdataset->private5 = NULL;
2701
2702         /*
2703          * Add noqname proof.
2704          */
2705         rdataset->private6 = header->noqname;
2706         if (rdataset->private6 != NULL)
2707                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
2708         rdataset->private7 = header->closest;
2709         if (rdataset->private7 != NULL)
2710                 rdataset->attributes |=  DNS_RDATASETATTR_CLOSEST;
2711
2712         /*
2713          * Copy out re-signing information.
2714          */
2715         if (RESIGN(header)) {
2716                 rdataset->attributes |=  DNS_RDATASETATTR_RESIGN;
2717                 rdataset->resign = header->resign;
2718         } else
2719                 rdataset->resign = 0;
2720 }
2721
2722 static inline isc_result_t
2723 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2724                  dns_name_t *foundname, dns_rdataset_t *rdataset,
2725                  dns_rdataset_t *sigrdataset)
2726 {
2727         isc_result_t result;
2728         dns_name_t *zcname;
2729         rbtdb_rdatatype_t type;
2730         dns_rbtnode_t *node;
2731
2732         /*
2733          * The caller MUST NOT be holding any node locks.
2734          */
2735
2736         node = search->zonecut;
2737         type = search->zonecut_rdataset->type;
2738
2739         /*
2740          * If we have to set foundname, we do it before anything else.
2741          * If we were to set foundname after we had set nodep or bound the
2742          * rdataset, then we'd have to undo that work if dns_name_copy()
2743          * failed.  By setting foundname first, there's nothing to undo if
2744          * we have trouble.
2745          */
2746         if (foundname != NULL && search->copy_name) {
2747                 zcname = dns_fixedname_name(&search->zonecut_name);
2748                 result = dns_name_copy(zcname, foundname, NULL);
2749                 if (result != ISC_R_SUCCESS)
2750                         return (result);
2751         }
2752         if (nodep != NULL) {
2753                 /*
2754                  * Note that we don't have to increment the node's reference
2755                  * count here because we're going to use the reference we
2756                  * already have in the search block.
2757                  */
2758                 *nodep = node;
2759                 search->need_cleanup = ISC_FALSE;
2760         }
2761         if (rdataset != NULL) {
2762                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2763                           isc_rwlocktype_read);
2764                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2765                               search->now, rdataset);
2766                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2767                         bind_rdataset(search->rbtdb, node,
2768                                       search->zonecut_sigrdataset,
2769                                       search->now, sigrdataset);
2770                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2771                             isc_rwlocktype_read);
2772         }
2773
2774         if (type == dns_rdatatype_dname)
2775                 return (DNS_R_DNAME);
2776         return (DNS_R_DELEGATION);
2777 }
2778
2779 static inline isc_boolean_t
2780 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2781            dns_rbtnode_t *node)
2782 {
2783         unsigned char *raw;     /* RDATASLAB */
2784         unsigned int count, size;
2785         dns_name_t ns_name;
2786         isc_boolean_t valid = ISC_FALSE;
2787         dns_offsets_t offsets;
2788         isc_region_t region;
2789         rdatasetheader_t *header;
2790
2791         /*
2792          * No additional locking is required.
2793          */
2794
2795         /*
2796          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
2797          * if it occurs at a zone cut, but is not valid below it.
2798          */
2799         if (type == dns_rdatatype_ns) {
2800                 if (node != search->zonecut) {
2801                         return (ISC_FALSE);
2802                 }
2803         } else if (type != dns_rdatatype_a &&
2804                    type != dns_rdatatype_aaaa &&
2805                    type != dns_rdatatype_a6) {
2806                 return (ISC_FALSE);
2807         }
2808
2809         header = search->zonecut_rdataset;
2810         raw = (unsigned char *)header + sizeof(*header);
2811         count = raw[0] * 256 + raw[1];
2812 #if DNS_RDATASET_FIXED
2813         raw += 2 + (4 * count);
2814 #else
2815         raw += 2;
2816 #endif
2817
2818         while (count > 0) {
2819                 count--;
2820                 size = raw[0] * 256 + raw[1];
2821 #if DNS_RDATASET_FIXED
2822                 raw += 4;
2823 #else
2824                 raw += 2;
2825 #endif
2826                 region.base = raw;
2827                 region.length = size;
2828                 raw += size;
2829                 /*
2830                  * XXX Until we have rdata structures, we have no choice but
2831                  * to directly access the rdata format.
2832                  */
2833                 dns_name_init(&ns_name, offsets);
2834                 dns_name_fromregion(&ns_name, &region);
2835                 if (dns_name_compare(&ns_name, name) == 0) {
2836                         valid = ISC_TRUE;
2837                         break;
2838                 }
2839         }
2840
2841         return (valid);
2842 }
2843
2844 static inline isc_boolean_t
2845 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2846             dns_name_t *name)
2847 {
2848         dns_fixedname_t fnext;
2849         dns_fixedname_t forigin;
2850         dns_name_t *next;
2851         dns_name_t *origin;
2852         dns_name_t prefix;
2853         dns_rbtdb_t *rbtdb;
2854         dns_rbtnode_t *node;
2855         isc_result_t result;
2856         isc_boolean_t answer = ISC_FALSE;
2857         rdatasetheader_t *header;
2858
2859         rbtdb = search->rbtdb;
2860
2861         dns_name_init(&prefix, NULL);
2862         dns_fixedname_init(&fnext);
2863         next = dns_fixedname_name(&fnext);
2864         dns_fixedname_init(&forigin);
2865         origin = dns_fixedname_name(&forigin);
2866
2867         result = dns_rbtnodechain_next(chain, NULL, NULL);
2868         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2869                 node = NULL;
2870                 result = dns_rbtnodechain_current(chain, &prefix,
2871                                                   origin, &node);
2872                 if (result != ISC_R_SUCCESS)
2873                         break;
2874                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2875                           isc_rwlocktype_read);
2876                 for (header = node->data;
2877                      header != NULL;
2878                      header = header->next) {
2879                         if (header->serial <= search->serial &&
2880                             !IGNORE(header) && EXISTS(header))
2881                                 break;
2882                 }
2883                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2884                             isc_rwlocktype_read);
2885                 if (header != NULL)
2886                         break;
2887                 result = dns_rbtnodechain_next(chain, NULL, NULL);
2888         }
2889         if (result == ISC_R_SUCCESS)
2890                 result = dns_name_concatenate(&prefix, origin, next, NULL);
2891         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2892                 answer = ISC_TRUE;
2893         return (answer);
2894 }
2895
2896 static inline isc_boolean_t
2897 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2898         dns_fixedname_t fnext;
2899         dns_fixedname_t forigin;
2900         dns_fixedname_t fprev;
2901         dns_name_t *next;
2902         dns_name_t *origin;
2903         dns_name_t *prev;
2904         dns_name_t name;
2905         dns_name_t rname;
2906         dns_name_t tname;
2907         dns_rbtdb_t *rbtdb;
2908         dns_rbtnode_t *node;
2909         dns_rbtnodechain_t chain;
2910         isc_boolean_t check_next = ISC_TRUE;
2911         isc_boolean_t check_prev = ISC_TRUE;
2912         isc_boolean_t answer = ISC_FALSE;
2913         isc_result_t result;
2914         rdatasetheader_t *header;
2915         unsigned int n;
2916
2917         rbtdb = search->rbtdb;
2918
2919         dns_name_init(&name, NULL);
2920         dns_name_init(&tname, NULL);
2921         dns_name_init(&rname, NULL);
2922         dns_fixedname_init(&fnext);
2923         next = dns_fixedname_name(&fnext);
2924         dns_fixedname_init(&fprev);
2925         prev = dns_fixedname_name(&fprev);
2926         dns_fixedname_init(&forigin);
2927         origin = dns_fixedname_name(&forigin);
2928
2929         /*
2930          * Find if qname is at or below a empty node.
2931          * Use our own copy of the chain.
2932          */
2933
2934         chain = search->chain;
2935         do {
2936                 node = NULL;
2937                 result = dns_rbtnodechain_current(&chain, &name,
2938                                                   origin, &node);
2939                 if (result != ISC_R_SUCCESS)
2940                         break;
2941                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2942                           isc_rwlocktype_read);
2943                 for (header = node->data;
2944                      header != NULL;
2945                      header = header->next) {
2946                         if (header->serial <= search->serial &&
2947                             !IGNORE(header) && EXISTS(header))
2948                                 break;
2949                 }
2950                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2951                             isc_rwlocktype_read);
2952                 if (header != NULL)
2953                         break;
2954                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
2955         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
2956         if (result == ISC_R_SUCCESS)
2957                 result = dns_name_concatenate(&name, origin, prev, NULL);
2958         if (result != ISC_R_SUCCESS)
2959                 check_prev = ISC_FALSE;
2960
2961         result = dns_rbtnodechain_next(&chain, NULL, NULL);
2962         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2963                 node = NULL;
2964                 result = dns_rbtnodechain_current(&chain, &name,
2965                                                   origin, &node);
2966                 if (result != ISC_R_SUCCESS)
2967                         break;
2968                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2969                           isc_rwlocktype_read);
2970                 for (header = node->data;
2971                      header != NULL;
2972                      header = header->next) {
2973                         if (header->serial <= search->serial &&
2974                             !IGNORE(header) && EXISTS(header))
2975                                 break;
2976                 }
2977                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2978                             isc_rwlocktype_read);
2979                 if (header != NULL)
2980                         break;
2981                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2982         }
2983         if (result == ISC_R_SUCCESS)
2984                 result = dns_name_concatenate(&name, origin, next, NULL);
2985         if (result != ISC_R_SUCCESS)
2986                 check_next = ISC_FALSE;
2987
2988         dns_name_clone(qname, &rname);
2989
2990         /*
2991          * Remove the wildcard label to find the terminal name.
2992          */
2993         n = dns_name_countlabels(wname);
2994         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
2995
2996         do {
2997                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
2998                     (check_next && dns_name_issubdomain(next, &rname))) {
2999                         answer = ISC_TRUE;
3000                         break;
3001                 }
3002                 /*
3003                  * Remove the left hand label.
3004                  */
3005                 n = dns_name_countlabels(&rname);
3006                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3007         } while (!dns_name_equal(&rname, &tname));
3008         return (answer);
3009 }
3010
3011 static inline isc_result_t
3012 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3013               dns_name_t *qname)
3014 {
3015         unsigned int i, j;
3016         dns_rbtnode_t *node, *level_node, *wnode;
3017         rdatasetheader_t *header;
3018         isc_result_t result = ISC_R_NOTFOUND;
3019         dns_name_t name;
3020         dns_name_t *wname;
3021         dns_fixedname_t fwname;
3022         dns_rbtdb_t *rbtdb;
3023         isc_boolean_t done, wild, active;
3024         dns_rbtnodechain_t wchain;
3025
3026         /*
3027          * Caller must be holding the tree lock and MUST NOT be holding
3028          * any node locks.
3029          */
3030
3031         /*
3032          * Examine each ancestor level.  If the level's wild bit
3033          * is set, then construct the corresponding wildcard name and
3034          * search for it.  If the wildcard node exists, and is active in
3035          * this version, we're done.  If not, then we next check to see
3036          * if the ancestor is active in this version.  If so, then there
3037          * can be no possible wildcard match and again we're done.  If not,
3038          * continue the search.
3039          */
3040
3041         rbtdb = search->rbtdb;
3042         i = search->chain.level_matches;
3043         done = ISC_FALSE;
3044         node = *nodep;
3045         do {
3046                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3047                           isc_rwlocktype_read);
3048
3049                 /*
3050                  * First we try to figure out if this node is active in
3051                  * the search's version.  We do this now, even though we
3052                  * may not need the information, because it simplifies the
3053                  * locking and code flow.
3054                  */
3055                 for (header = node->data;
3056                      header != NULL;
3057                      header = header->next) {
3058                         if (header->serial <= search->serial &&
3059                             !IGNORE(header) && EXISTS(header))
3060                                 break;
3061                 }
3062                 if (header != NULL)
3063                         active = ISC_TRUE;
3064                 else
3065                         active = ISC_FALSE;
3066
3067                 if (node->wild)
3068                         wild = ISC_TRUE;
3069                 else
3070                         wild = ISC_FALSE;
3071
3072                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3073                             isc_rwlocktype_read);
3074
3075                 if (wild) {
3076                         /*
3077                          * Construct the wildcard name for this level.
3078                          */
3079                         dns_name_init(&name, NULL);
3080                         dns_rbt_namefromnode(node, &name);
3081                         dns_fixedname_init(&fwname);
3082                         wname = dns_fixedname_name(&fwname);
3083                         result = dns_name_concatenate(dns_wildcardname, &name,
3084                                                       wname, NULL);
3085                         j = i;
3086                         while (result == ISC_R_SUCCESS && j != 0) {
3087                                 j--;
3088                                 level_node = search->chain.levels[j];
3089                                 dns_name_init(&name, NULL);
3090                                 dns_rbt_namefromnode(level_node, &name);
3091                                 result = dns_name_concatenate(wname,
3092                                                               &name,
3093                                                               wname,
3094                                                               NULL);
3095                         }
3096                         if (result != ISC_R_SUCCESS)
3097                                 break;
3098
3099                         wnode = NULL;
3100                         dns_rbtnodechain_init(&wchain, NULL);
3101                         result = dns_rbt_findnode(rbtdb->tree, wname,
3102                                                   NULL, &wnode, &wchain,
3103                                                   DNS_RBTFIND_EMPTYDATA,
3104                                                   NULL, NULL);
3105                         if (result == ISC_R_SUCCESS) {
3106                                 nodelock_t *lock;
3107
3108                                 /*
3109                                  * We have found the wildcard node.  If it
3110                                  * is active in the search's version, we're
3111                                  * done.
3112                                  */
3113                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
3114                                 NODE_LOCK(lock, isc_rwlocktype_read);
3115                                 for (header = wnode->data;
3116                                      header != NULL;
3117                                      header = header->next) {
3118                                         if (header->serial <= search->serial &&
3119                                             !IGNORE(header) && EXISTS(header))
3120                                                 break;
3121                                 }
3122                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3123                                 if (header != NULL ||
3124                                     activeempty(search, &wchain, wname)) {
3125                                         if (activeemtpynode(search, qname,
3126                                                             wname)) {
3127                                                 return (ISC_R_NOTFOUND);
3128                                         }
3129                                         /*
3130                                          * The wildcard node is active!
3131                                          *
3132                                          * Note: result is still ISC_R_SUCCESS
3133                                          * so we don't have to set it.
3134                                          */
3135                                         *nodep = wnode;
3136                                         break;
3137                                 }
3138                         } else if (result != ISC_R_NOTFOUND &&
3139                                    result != DNS_R_PARTIALMATCH) {
3140                                 /*
3141                                  * An error has occurred.  Bail out.
3142                                  */
3143                                 break;
3144                         }
3145                 }
3146
3147                 if (active) {
3148                         /*
3149                          * The level node is active.  Any wildcarding
3150                          * present at higher levels has no
3151                          * effect and we're done.
3152                          */
3153                         result = ISC_R_NOTFOUND;
3154                         break;
3155                 }
3156
3157                 if (i > 0) {
3158                         i--;
3159                         node = search->chain.levels[i];
3160                 } else
3161                         done = ISC_TRUE;
3162         } while (!done);
3163
3164         return (result);
3165 }
3166
3167 static isc_boolean_t
3168 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3169 {
3170         dns_rdata_t rdata = DNS_RDATA_INIT;
3171         dns_rdata_nsec3_t nsec3;
3172         unsigned char *raw;                     /* RDATASLAB */
3173         unsigned int rdlen, count;
3174         isc_region_t region;
3175         isc_result_t result;
3176
3177         REQUIRE(header->type == dns_rdatatype_nsec3);
3178
3179         raw = (unsigned char *)header + sizeof(*header);
3180         count = raw[0] * 256 + raw[1]; /* count */
3181 #if DNS_RDATASET_FIXED
3182         raw += count * 4 + 2;
3183 #else
3184         raw += 2;
3185 #endif
3186         while (count-- > 0) {
3187                 rdlen = raw[0] * 256 + raw[1];
3188 #if DNS_RDATASET_FIXED
3189                 raw += 4;
3190 #else
3191                 raw += 2;
3192 #endif
3193                 region.base = raw;
3194                 region.length = rdlen;
3195                 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3196                                      dns_rdatatype_nsec3, &region);
3197                 raw += rdlen;
3198                 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3199                 INSIST(result == ISC_R_SUCCESS);
3200                 if (nsec3.hash == search->rbtversion->hash &&
3201                     nsec3.iterations == search->rbtversion->iterations &&
3202                     nsec3.salt_length == search->rbtversion->salt_length &&
3203                     memcmp(nsec3.salt, search->rbtversion->salt,
3204                            nsec3.salt_length) == 0)
3205                         return (ISC_TRUE);
3206                 dns_rdata_reset(&rdata);
3207         }
3208         return (ISC_FALSE);
3209 }
3210
3211 /*
3212  * Find node of the NSEC/NSEC3 record that is 'name'.
3213  */
3214 static inline isc_result_t
3215 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3216                   dns_name_t *foundname, dns_rdataset_t *rdataset,
3217                   dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3218                   dns_db_secure_t secure)
3219 {
3220         dns_rbtnode_t *node;
3221         rdatasetheader_t *header, *header_next, *found, *foundsig;
3222         isc_boolean_t empty_node;
3223         isc_result_t result;
3224         dns_fixedname_t fname, forigin;
3225         dns_name_t *name, *origin;
3226         dns_rdatatype_t type;
3227         rbtdb_rdatatype_t sigtype;
3228         isc_boolean_t wraps;
3229         isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3230
3231         if (tree == search->rbtdb->nsec3) {
3232                 type = dns_rdatatype_nsec3;
3233                 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3234                 wraps = ISC_TRUE;
3235         } else {
3236                 type = dns_rdatatype_nsec;
3237                 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3238                 wraps = ISC_FALSE;
3239         }
3240
3241  again:
3242         do {
3243                 node = NULL;
3244                 dns_fixedname_init(&fname);
3245                 name = dns_fixedname_name(&fname);
3246                 dns_fixedname_init(&forigin);
3247                 origin = dns_fixedname_name(&forigin);
3248                 result = dns_rbtnodechain_current(&search->chain, name,
3249                                                   origin, &node);
3250                 if (result != ISC_R_SUCCESS)
3251                         return (result);
3252                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3253                           isc_rwlocktype_read);
3254                 found = NULL;
3255                 foundsig = NULL;
3256                 empty_node = ISC_TRUE;
3257                 for (header = node->data;
3258                      header != NULL;
3259                      header = header_next) {
3260                         header_next = header->next;
3261                         /*
3262                          * Look for an active, extant NSEC or RRSIG NSEC.
3263                          */
3264                         do {
3265                                 if (header->serial <= search->serial &&
3266                                     !IGNORE(header)) {
3267                                         /*
3268                                          * Is this a "this rdataset doesn't
3269                                          * exist" record?
3270                                          */
3271                                         if (NONEXISTENT(header))
3272                                                 header = NULL;
3273                                         break;
3274                                 } else
3275                                         header = header->down;
3276                         } while (header != NULL);
3277                         if (header != NULL) {
3278                                 /*
3279                                  * We now know that there is at least one
3280                                  * active rdataset at this node.
3281                                  */
3282                                 empty_node = ISC_FALSE;
3283                                 if (header->type == type) {
3284                                         found = header;
3285                                         if (foundsig != NULL)
3286                                                 break;
3287                                 } else if (header->type == sigtype) {
3288                                         foundsig = header;
3289                                         if (found != NULL)
3290                                                 break;
3291                                 }
3292                         }
3293                 }
3294                 if (!empty_node) {
3295                         if (found != NULL && search->rbtversion->havensec3 &&
3296                             found->type == dns_rdatatype_nsec3 &&
3297                             !matchparams(found, search)) {
3298                                 empty_node = ISC_TRUE;
3299                                 found = NULL;
3300                                 foundsig = NULL;
3301                                 result = dns_rbtnodechain_prev(&search->chain,
3302                                                                NULL, NULL);
3303                         } else if (found != NULL &&
3304                                    (foundsig != NULL || !need_sig))
3305                         {
3306                                 /*
3307                                  * We've found the right NSEC/NSEC3 record.
3308                                  *
3309                                  * Note: for this to really be the right
3310                                  * NSEC record, it's essential that the NSEC
3311                                  * records of any nodes obscured by a zone
3312                                  * cut have been removed; we assume this is
3313                                  * the case.
3314                                  */
3315                                 result = dns_name_concatenate(name, origin,
3316                                                               foundname, NULL);
3317                                 if (result == ISC_R_SUCCESS) {
3318                                         if (nodep != NULL) {
3319                                                 new_reference(search->rbtdb,
3320                                                               node);
3321                                                 *nodep = node;
3322                                         }
3323                                         bind_rdataset(search->rbtdb, node,
3324                                                       found, search->now,
3325                                                       rdataset);
3326                                         if (foundsig != NULL)
3327                                                 bind_rdataset(search->rbtdb,
3328                                                               node,
3329                                                               foundsig,
3330                                                               search->now,
3331                                                               sigrdataset);
3332                                 }
3333                         } else if (found == NULL && foundsig == NULL) {
3334                                 /*
3335                                  * This node is active, but has no NSEC or
3336                                  * RRSIG NSEC.  That means it's glue or
3337                                  * other obscured zone data that isn't
3338                                  * relevant for our search.  Treat the
3339                                  * node as if it were empty and keep looking.
3340                                  */
3341                                 empty_node = ISC_TRUE;
3342                                 result = dns_rbtnodechain_prev(&search->chain,
3343                                                                NULL, NULL);
3344                         } else {
3345                                 /*
3346                                  * We found an active node, but either the
3347                                  * NSEC or the RRSIG NSEC is missing.  This
3348                                  * shouldn't happen.
3349                                  */
3350                                 result = DNS_R_BADDB;
3351                         }
3352                 } else {
3353                         /*
3354                          * This node isn't active.  We've got to keep
3355                          * looking.
3356                          */
3357                         result = dns_rbtnodechain_prev(&search->chain, NULL,
3358                                                        NULL);
3359                 }
3360                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3361                             isc_rwlocktype_read);
3362         } while (empty_node && result == ISC_R_SUCCESS);
3363
3364         if (result == ISC_R_NOMORE && wraps) {
3365                 result = dns_rbtnodechain_last(&search->chain, tree,
3366                                                NULL, NULL);
3367                 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3368                         wraps = ISC_FALSE;
3369                         goto again;
3370                 }
3371         }
3372
3373         /*
3374          * If the result is ISC_R_NOMORE, then we got to the beginning of
3375          * the database and didn't find a NSEC record.  This shouldn't
3376          * happen.
3377          */
3378         if (result == ISC_R_NOMORE)
3379                 result = DNS_R_BADDB;
3380
3381         return (result);
3382 }
3383
3384 static isc_result_t
3385 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3386           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3387           dns_dbnode_t **nodep, dns_name_t *foundname,
3388           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3389 {
3390         dns_rbtnode_t *node = NULL;
3391         isc_result_t result;
3392         rbtdb_search_t search;
3393         isc_boolean_t cname_ok = ISC_TRUE;
3394         isc_boolean_t close_version = ISC_FALSE;
3395         isc_boolean_t maybe_zonecut = ISC_FALSE;
3396         isc_boolean_t at_zonecut = ISC_FALSE;
3397         isc_boolean_t wild;
3398         isc_boolean_t empty_node;
3399         rdatasetheader_t *header, *header_next, *found, *nsecheader;
3400         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3401         rbtdb_rdatatype_t sigtype;
3402         isc_boolean_t active;
3403         dns_rbtnodechain_t chain;
3404         nodelock_t *lock;
3405         dns_rbt_t *tree;
3406
3407         search.rbtdb = (dns_rbtdb_t *)db;
3408
3409         REQUIRE(VALID_RBTDB(search.rbtdb));
3410
3411         /*
3412          * We don't care about 'now'.
3413          */
3414         UNUSED(now);
3415
3416         /*
3417          * If the caller didn't supply a version, attach to the current
3418          * version.
3419          */
3420         if (version == NULL) {
3421                 currentversion(db, &version);
3422                 close_version = ISC_TRUE;
3423         }
3424
3425         search.rbtversion = version;
3426         search.serial = search.rbtversion->serial;
3427         search.options = options;
3428         search.copy_name = ISC_FALSE;
3429         search.need_cleanup = ISC_FALSE;
3430         search.wild = ISC_FALSE;
3431         search.zonecut = NULL;
3432         dns_fixedname_init(&search.zonecut_name);
3433         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3434         search.now = 0;
3435
3436         /*
3437          * 'wild' will be true iff. we've matched a wildcard.
3438          */
3439         wild = ISC_FALSE;
3440
3441         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3442
3443         /*
3444          * Search down from the root of the tree.  If, while going down, we
3445          * encounter a callback node, zone_zonecut_callback() will search the
3446          * rdatasets at the zone cut for active DNAME or NS rdatasets.
3447          */
3448         tree =  (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3449                                                          search.rbtdb->tree;
3450         result = dns_rbt_findnode(tree, name, foundname, &node,
3451                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
3452                                   zone_zonecut_callback, &search);
3453
3454         if (result == DNS_R_PARTIALMATCH) {
3455         partial_match:
3456                 if (search.zonecut != NULL) {
3457                     result = setup_delegation(&search, nodep, foundname,
3458                                               rdataset, sigrdataset);
3459                     goto tree_exit;
3460                 }
3461
3462                 if (search.wild) {
3463                         /*
3464                          * At least one of the levels in the search chain
3465                          * potentially has a wildcard.  For each such level,
3466                          * we must see if there's a matching wildcard active
3467                          * in the current version.
3468                          */
3469                         result = find_wildcard(&search, &node, name);
3470                         if (result == ISC_R_SUCCESS) {
3471                                 result = dns_name_copy(name, foundname, NULL);
3472                                 if (result != ISC_R_SUCCESS)
3473                                         goto tree_exit;
3474                                 wild = ISC_TRUE;
3475                                 goto found;
3476                         }
3477                         else if (result != ISC_R_NOTFOUND)
3478                                 goto tree_exit;
3479                 }
3480
3481                 chain = search.chain;
3482                 active = activeempty(&search, &chain, name);
3483
3484                 /*
3485                  * If we're here, then the name does not exist, is not
3486                  * beneath a zonecut, and there's no matching wildcard.
3487                  */
3488                 if ((search.rbtversion->secure == dns_db_secure &&
3489                      !search.rbtversion->havensec3) ||
3490                     (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3491                     (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3492                 {
3493                         result = find_closest_nsec(&search, nodep, foundname,
3494                                                    rdataset, sigrdataset, tree,
3495                                                    search.rbtversion->secure);
3496                         if (result == ISC_R_SUCCESS)
3497                                 result = active ? DNS_R_EMPTYNAME :
3498                                                   DNS_R_NXDOMAIN;
3499                 } else
3500                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3501                 goto tree_exit;
3502         } else if (result != ISC_R_SUCCESS)
3503                 goto tree_exit;
3504
3505  found:
3506         /*
3507          * We have found a node whose name is the desired name, or we
3508          * have matched a wildcard.
3509          */
3510
3511         if (search.zonecut != NULL) {
3512                 /*
3513                  * If we're beneath a zone cut, we don't want to look for
3514                  * CNAMEs because they're not legitimate zone glue.
3515                  */
3516                 cname_ok = ISC_FALSE;
3517         } else {
3518                 /*
3519                  * The node may be a zone cut itself.  If it might be one,
3520                  * make sure we check for it later.
3521                  *
3522                  * DS records live above the zone cut in ordinary zone so
3523                  * we want to ignore any referral.
3524                  *
3525                  * Stub zones don't have anything "above" the delgation so
3526                  * we always return a referral.
3527                  */
3528                 if (node->find_callback &&
3529                     ((node != search.rbtdb->origin_node &&
3530                       !dns_rdatatype_atparent(type)) ||
3531                      IS_STUB(search.rbtdb)))
3532                         maybe_zonecut = ISC_TRUE;
3533         }
3534
3535         /*
3536          * Certain DNSSEC types are not subject to CNAME matching
3537          * (RFC4035, section 2.5 and RFC3007).
3538          *
3539          * We don't check for RRSIG, because we don't store RRSIG records
3540          * directly.
3541          */
3542         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3543                 cname_ok = ISC_FALSE;
3544
3545         /*
3546          * We now go looking for rdata...
3547          */
3548
3549         lock = &search.rbtdb->node_locks[node->locknum].lock;
3550         NODE_LOCK(lock, isc_rwlocktype_read);
3551
3552         found = NULL;
3553         foundsig = NULL;
3554         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3555         nsecheader = NULL;
3556         nsecsig = NULL;
3557         cnamesig = NULL;
3558         empty_node = ISC_TRUE;
3559         for (header = node->data; header != NULL; header = header_next) {
3560                 header_next = header->next;
3561                 /*
3562                  * Look for an active, extant rdataset.
3563                  */
3564                 do {
3565                         if (header->serial <= search.serial &&
3566                             !IGNORE(header)) {
3567                                 /*
3568                                  * Is this a "this rdataset doesn't
3569                                  * exist" record?
3570                                  */
3571                                 if (NONEXISTENT(header))
3572                                         header = NULL;
3573                                 break;
3574                         } else
3575                                 header = header->down;
3576                 } while (header != NULL);
3577                 if (header != NULL) {
3578                         /*
3579                          * We now know that there is at least one active
3580                          * rdataset at this node.
3581                          */
3582                         empty_node = ISC_FALSE;
3583
3584                         /*
3585                          * Do special zone cut handling, if requested.
3586                          */
3587                         if (maybe_zonecut &&
3588                             header->type == dns_rdatatype_ns) {
3589                                 /*
3590                                  * We increment the reference count on node to
3591                                  * ensure that search->zonecut_rdataset will
3592                                  * still be valid later.
3593                                  */
3594                                 new_reference(search.rbtdb, node);
3595                                 search.zonecut = node;
3596                                 search.zonecut_rdataset = header;
3597                                 search.zonecut_sigrdataset = NULL;
3598                                 search.need_cleanup = ISC_TRUE;
3599                                 maybe_zonecut = ISC_FALSE;
3600                                 at_zonecut = ISC_TRUE;
3601                                 /*
3602                                  * It is not clear if KEY should still be
3603                                  * allowed at the parent side of the zone
3604                                  * cut or not.  It is needed for RFC3007
3605                                  * validated updates.
3606                                  */
3607                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3608                                     && type != dns_rdatatype_nsec
3609                                     && type != dns_rdatatype_key) {
3610                                         /*
3611                                          * Glue is not OK, but any answer we
3612                                          * could return would be glue.  Return
3613                                          * the delegation.
3614                                          */
3615                                         found = NULL;
3616                                         break;
3617                                 }
3618                                 if (found != NULL && foundsig != NULL)
3619                                         break;
3620                         }
3621
3622
3623                         /*
3624                          * If the NSEC3 record doesn't match the chain
3625                          * we are using behave as if it isn't here.
3626                          */
3627                         if (header->type == dns_rdatatype_nsec3 &&
3628                            !matchparams(header, &search)) {
3629                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3630                                 goto partial_match;
3631                         }
3632                         /*
3633                          * If we found a type we were looking for,
3634                          * remember it.
3635                          */
3636                         if (header->type == type ||
3637                             type == dns_rdatatype_any ||
3638                             (header->type == dns_rdatatype_cname &&
3639                              cname_ok)) {
3640                                 /*
3641                                  * We've found the answer!
3642                                  */
3643                                 found = header;
3644                                 if (header->type == dns_rdatatype_cname &&
3645                                     cname_ok) {
3646                                         /*
3647                                          * We may be finding a CNAME instead
3648                                          * of the desired type.
3649                                          *
3650                                          * If we've already got the CNAME RRSIG,
3651                                          * use it, otherwise change sigtype
3652                                          * so that we find it.
3653                                          */
3654                                         if (cnamesig != NULL)
3655                                                 foundsig = cnamesig;
3656                                         else
3657                                                 sigtype =
3658                                                     RBTDB_RDATATYPE_SIGCNAME;
3659                                 }
3660                                 /*
3661                                  * If we've got all we need, end the search.
3662                                  */
3663                                 if (!maybe_zonecut && foundsig != NULL)
3664                                         break;
3665                         } else if (header->type == sigtype) {
3666                                 /*
3667                                  * We've found the RRSIG rdataset for our
3668                                  * target type.  Remember it.
3669                                  */
3670                                 foundsig = header;
3671                                 /*
3672                                  * If we've got all we need, end the search.
3673                                  */
3674                                 if (!maybe_zonecut && found != NULL)
3675                                         break;
3676                         } else if (header->type == dns_rdatatype_nsec &&
3677                                    !search.rbtversion->havensec3) {
3678                                 /*
3679                                  * Remember a NSEC rdataset even if we're
3680                                  * not specifically looking for it, because
3681                                  * we might need it later.
3682                                  */
3683                                 nsecheader = header;
3684                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3685                                    !search.rbtversion->havensec3) {
3686                                 /*
3687                                  * If we need the NSEC rdataset, we'll also
3688                                  * need its signature.
3689                                  */
3690                                 nsecsig = header;
3691                         } else if (cname_ok &&
3692                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3693                                 /*
3694                                  * If we get a CNAME match, we'll also need
3695                                  * its signature.
3696                                  */
3697                                 cnamesig = header;
3698                         }
3699                 }
3700         }
3701
3702         if (empty_node) {
3703                 /*
3704                  * We have an exact match for the name, but there are no
3705                  * active rdatasets in the desired version.  That means that
3706                  * this node doesn't exist in the desired version, and that
3707                  * we really have a partial match.
3708                  */
3709                 if (!wild) {
3710                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3711                         goto partial_match;
3712                 }
3713         }
3714
3715         /*
3716          * If we didn't find what we were looking for...
3717          */
3718         if (found == NULL) {
3719                 if (search.zonecut != NULL) {
3720                         /*
3721                          * We were trying to find glue at a node beneath a
3722                          * zone cut, but didn't.
3723                          *
3724                          * Return the delegation.
3725                          */
3726                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3727                         result = setup_delegation(&search, nodep, foundname,
3728                                                   rdataset, sigrdataset);
3729                         goto tree_exit;
3730                 }
3731                 /*
3732                  * The desired type doesn't exist.
3733                  */
3734                 result = DNS_R_NXRRSET;
3735                 if (search.rbtversion->secure == dns_db_secure &&
3736                     !search.rbtversion->havensec3 &&
3737                     (nsecheader == NULL || nsecsig == NULL)) {
3738                         /*
3739                          * The zone is secure but there's no NSEC,
3740                          * or the NSEC has no signature!
3741                          */
3742                         if (!wild) {
3743                                 result = DNS_R_BADDB;
3744                                 goto node_exit;
3745                         }
3746
3747                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3748                         result = find_closest_nsec(&search, nodep, foundname,
3749                                                    rdataset, sigrdataset,
3750                                                    search.rbtdb->tree,
3751                                                    search.rbtversion->secure);
3752                         if (result == ISC_R_SUCCESS)
3753                                 result = DNS_R_EMPTYWILD;
3754                         goto tree_exit;
3755                 }
3756                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3757                     nsecheader == NULL)
3758                 {
3759                         /*
3760                          * There's no NSEC record, and we were told
3761                          * to find one.
3762                          */
3763                         result = DNS_R_BADDB;
3764                         goto node_exit;
3765                 }
3766                 if (nodep != NULL) {
3767                         new_reference(search.rbtdb, node);
3768                         *nodep = node;
3769                 }
3770                 if ((search.rbtversion->secure == dns_db_secure &&
3771                      !search.rbtversion->havensec3) ||
3772                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
3773                 {
3774                         bind_rdataset(search.rbtdb, node, nsecheader,
3775                                       0, rdataset);
3776                         if (nsecsig != NULL)
3777                                 bind_rdataset(search.rbtdb, node,
3778                                               nsecsig, 0, sigrdataset);
3779                 }
3780                 if (wild)
3781                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3782                 goto node_exit;
3783         }
3784
3785         /*
3786          * We found what we were looking for, or we found a CNAME.
3787          */
3788
3789         if (type != found->type &&
3790             type != dns_rdatatype_any &&
3791             found->type == dns_rdatatype_cname) {
3792                 /*
3793                  * We weren't doing an ANY query and we found a CNAME instead
3794                  * of the type we were looking for, so we need to indicate
3795                  * that result to the caller.
3796                  */
3797                 result = DNS_R_CNAME;
3798         } else if (search.zonecut != NULL) {
3799                 /*
3800                  * If we're beneath a zone cut, we must indicate that the
3801                  * result is glue, unless we're actually at the zone cut
3802                  * and the type is NSEC or KEY.
3803                  */
3804                 if (search.zonecut == node) {
3805                         /*
3806                          * It is not clear if KEY should still be
3807                          * allowed at the parent side of the zone
3808                          * cut or not.  It is needed for RFC3007
3809                          * validated updates.
3810                          */
3811                         if (type == dns_rdatatype_nsec ||
3812                             type == dns_rdatatype_nsec3 ||
3813                             type == dns_rdatatype_key)
3814                                 result = ISC_R_SUCCESS;
3815                         else if (type == dns_rdatatype_any)
3816                                 result = DNS_R_ZONECUT;
3817                         else
3818                                 result = DNS_R_GLUE;
3819                 } else
3820                         result = DNS_R_GLUE;
3821                 /*
3822                  * We might have found data that isn't glue, but was occluded
3823                  * by a dynamic update.  If the caller cares about this, they
3824                  * will have told us to validate glue.
3825                  *
3826                  * XXX We should cache the glue validity state!
3827                  */
3828                 if (result == DNS_R_GLUE &&
3829                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3830                     !valid_glue(&search, foundname, type, node)) {
3831                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3832                         result = setup_delegation(&search, nodep, foundname,
3833                                                   rdataset, sigrdataset);
3834                     goto tree_exit;
3835                 }
3836         } else {
3837                 /*
3838                  * An ordinary successful query!
3839                  */
3840                 result = ISC_R_SUCCESS;
3841         }
3842
3843         if (nodep != NULL) {
3844                 if (!at_zonecut)
3845                         new_reference(search.rbtdb, node);
3846                 else
3847                         search.need_cleanup = ISC_FALSE;
3848                 *nodep = node;
3849         }
3850
3851         if (type != dns_rdatatype_any) {
3852                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3853                 if (foundsig != NULL)
3854                         bind_rdataset(search.rbtdb, node, foundsig, 0,
3855                                       sigrdataset);
3856         }
3857
3858         if (wild)
3859                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3860
3861  node_exit:
3862         NODE_UNLOCK(lock, isc_rwlocktype_read);
3863
3864  tree_exit:
3865         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3866
3867         /*
3868          * If we found a zonecut but aren't going to use it, we have to
3869          * let go of it.
3870          */
3871         if (search.need_cleanup) {
3872                 node = search.zonecut;
3873                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3874
3875                 NODE_LOCK(lock, isc_rwlocktype_read);
3876                 decrement_reference(search.rbtdb, node, 0,
3877                                     isc_rwlocktype_read, isc_rwlocktype_none,
3878                                     ISC_FALSE);
3879                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3880         }
3881
3882         if (close_version)
3883                 closeversion(db, &version, ISC_FALSE);
3884
3885         dns_rbtnodechain_reset(&search.chain);
3886
3887         return (result);
3888 }
3889
3890 static isc_result_t
3891 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3892                  isc_stdtime_t now, dns_dbnode_t **nodep,
3893                  dns_name_t *foundname,
3894                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3895 {
3896         UNUSED(db);
3897         UNUSED(name);
3898         UNUSED(options);
3899         UNUSED(now);
3900         UNUSED(nodep);
3901         UNUSED(foundname);
3902         UNUSED(rdataset);
3903         UNUSED(sigrdataset);
3904
3905         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3906
3907         return (ISC_R_NOTIMPLEMENTED);
3908 }
3909
3910 static isc_result_t
3911 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3912         rbtdb_search_t *search = arg;
3913         rdatasetheader_t *header, *header_prev, *header_next;
3914         rdatasetheader_t *dname_header, *sigdname_header;
3915         isc_result_t result;
3916         nodelock_t *lock;
3917         isc_rwlocktype_t locktype;
3918
3919         /* XXX comment */
3920
3921         REQUIRE(search->zonecut == NULL);
3922
3923         /*
3924          * Keep compiler silent.
3925          */
3926         UNUSED(name);
3927
3928         lock = &(search->rbtdb->node_locks[node->locknum].lock);
3929         locktype = isc_rwlocktype_read;
3930         NODE_LOCK(lock, locktype);
3931
3932         /*
3933          * Look for a DNAME or RRSIG DNAME rdataset.
3934          */
3935         dname_header = NULL;
3936         sigdname_header = NULL;
3937         header_prev = NULL;
3938         for (header = node->data; header != NULL; header = header_next) {
3939                 header_next = header->next;
3940                 if (header->rdh_ttl <= search->now) {
3941                         /*
3942                          * This rdataset is stale.  If no one else is
3943                          * using the node, we can clean it up right
3944                          * now, otherwise we mark it as stale, and
3945                          * the node as dirty, so it will get cleaned
3946                          * up later.
3947                          */
3948                         if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
3949                             (locktype == isc_rwlocktype_write ||
3950                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3951                                 /*
3952                                  * We update the node's status only when we
3953                                  * can get write access; otherwise, we leave
3954                                  * others to this work.  Periodical cleaning
3955                                  * will eventually take the job as the last
3956                                  * resort.
3957                                  * We won't downgrade the lock, since other
3958                                  * rdatasets are probably stale, too.
3959                                  */
3960                                 locktype = isc_rwlocktype_write;
3961
3962                                 if (dns_rbtnode_refcurrent(node) == 0) {
3963                                         isc_mem_t *mctx;
3964
3965                                         /*
3966                                          * header->down can be non-NULL if the
3967                                          * refcount has just decremented to 0
3968                                          * but decrement_reference() has not
3969                                          * performed clean_cache_node(), in
3970                                          * which case we need to purge the
3971                                          * stale headers first.
3972                                          */
3973                                         mctx = search->rbtdb->common.mctx;
3974                                         clean_stale_headers(search->rbtdb,
3975                                                             mctx,
3976                                                             header);
3977                                         if (header_prev != NULL)
3978                                                 header_prev->next =
3979                                                         header->next;
3980                                         else
3981                                                 node->data = header->next;
3982                                         free_rdataset(search->rbtdb, mctx,
3983                                                       header);
3984                                 } else {
3985                                         header->attributes |=
3986                                                 RDATASET_ATTR_STALE;
3987                                         node->dirty = 1;
3988                                         header_prev = header;
3989                                 }
3990                         } else
3991                                 header_prev = header;
3992                 } else if (header->type == dns_rdatatype_dname &&
3993                            EXISTS(header)) {
3994                         dname_header = header;
3995                         header_prev = header;
3996                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
3997                          EXISTS(header)) {
3998                         sigdname_header = header;
3999                         header_prev = header;
4000                 } else
4001                         header_prev = header;
4002         }
4003
4004         if (dname_header != NULL &&
4005             (!DNS_TRUST_PENDING(dname_header->trust) ||
4006              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4007                 /*
4008                  * We increment the reference count on node to ensure that
4009                  * search->zonecut_rdataset will still be valid later.
4010                  */
4011                 new_reference(search->rbtdb, node);
4012                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4013                 search->zonecut = node;
4014                 search->zonecut_rdataset = dname_header;
4015                 search->zonecut_sigrdataset = sigdname_header;
4016                 search->need_cleanup = ISC_TRUE;
4017                 result = DNS_R_PARTIALMATCH;
4018         } else
4019                 result = DNS_R_CONTINUE;
4020
4021         NODE_UNLOCK(lock, locktype);
4022
4023         return (result);
4024 }
4025
4026 static inline isc_result_t
4027 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4028                      dns_dbnode_t **nodep, dns_name_t *foundname,
4029                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4030 {
4031         unsigned int i;
4032         dns_rbtnode_t *level_node;
4033         rdatasetheader_t *header, *header_prev, *header_next;
4034         rdatasetheader_t *found, *foundsig;
4035         isc_result_t result = ISC_R_NOTFOUND;
4036         dns_name_t name;
4037         dns_rbtdb_t *rbtdb;
4038         isc_boolean_t done;
4039         nodelock_t *lock;
4040         isc_rwlocktype_t locktype;
4041
4042         /*
4043          * Caller must be holding the tree lock.
4044          */
4045
4046         rbtdb = search->rbtdb;
4047         i = search->chain.level_matches;
4048         done = ISC_FALSE;
4049         do {
4050                 locktype = isc_rwlocktype_read;
4051                 lock = &rbtdb->node_locks[node->locknum].lock;
4052                 NODE_LOCK(lock, locktype);
4053
4054                 /*
4055                  * Look for NS and RRSIG NS rdatasets.
4056                  */
4057                 found = NULL;
4058                 foundsig = NULL;
4059                 header_prev = NULL;
4060                 for (header = node->data;
4061                      header != NULL;
4062                      header = header_next) {
4063                         header_next = header->next;
4064                         if (header->rdh_ttl <= search->now) {
4065                                 /*
4066                                  * This rdataset is stale.  If no one else is
4067                                  * using the node, we can clean it up right
4068                                  * now, otherwise we mark it as stale, and
4069                                  * the node as dirty, so it will get cleaned
4070                                  * up later.
4071                                  */
4072                                 if ((header->rdh_ttl <= search->now -
4073                                                     RBTDB_VIRTUAL) &&
4074                                     (locktype == isc_rwlocktype_write ||
4075                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4076                                         /*
4077                                          * We update the node's status only
4078                                          * when we can get write access.
4079                                          */
4080                                         locktype = isc_rwlocktype_write;
4081
4082                                         if (dns_rbtnode_refcurrent(node)
4083                                             == 0) {
4084                                                 isc_mem_t *m;
4085
4086                                                 m = search->rbtdb->common.mctx;
4087                                                 clean_stale_headers(
4088                                                         search->rbtdb,
4089                                                         m, header);
4090                                                 if (header_prev != NULL)
4091                                                         header_prev->next =
4092                                                                 header->next;
4093                                                 else
4094                                                         node->data =
4095                                                                 header->next;
4096                                                 free_rdataset(rbtdb, m,
4097                                                               header);
4098                                         } else {
4099                                                 header->attributes |=
4100                                                         RDATASET_ATTR_STALE;
4101                                                 node->dirty = 1;
4102                                                 header_prev = header;
4103                                         }
4104                                 } else
4105                                         header_prev = header;
4106                         } else if (EXISTS(header)) {
4107                                 /*
4108                                  * We've found an extant rdataset.  See if
4109                                  * we're interested in it.
4110                                  */
4111                                 if (header->type == dns_rdatatype_ns) {
4112                                         found = header;
4113                                         if (foundsig != NULL)
4114                                                 break;
4115                                 } else if (header->type ==
4116                                            RBTDB_RDATATYPE_SIGNS) {
4117                                         foundsig = header;
4118                                         if (found != NULL)
4119                                                 break;
4120                                 }
4121                                 header_prev = header;
4122                         } else
4123                                 header_prev = header;
4124                 }
4125
4126                 if (found != NULL) {
4127                         /*
4128                          * If we have to set foundname, we do it before
4129                          * anything else.  If we were to set foundname after
4130                          * we had set nodep or bound the rdataset, then we'd
4131                          * have to undo that work if dns_name_concatenate()
4132                          * failed.  By setting foundname first, there's
4133                          * nothing to undo if we have trouble.
4134                          */
4135                         if (foundname != NULL) {
4136                                 dns_name_init(&name, NULL);
4137                                 dns_rbt_namefromnode(node, &name);
4138                                 result = dns_name_copy(&name, foundname, NULL);
4139                                 while (result == ISC_R_SUCCESS && i > 0) {
4140                                         i--;
4141                                         level_node = search->chain.levels[i];
4142                                         dns_name_init(&name, NULL);
4143                                         dns_rbt_namefromnode(level_node,
4144                                                              &name);
4145                                         result =
4146                                                 dns_name_concatenate(foundname,
4147                                                                      &name,
4148                                                                      foundname,
4149                                                                      NULL);
4150                                 }
4151                                 if (result != ISC_R_SUCCESS) {
4152                                         *nodep = NULL;
4153                                         goto node_exit;
4154                                 }
4155                         }
4156                         result = DNS_R_DELEGATION;
4157                         if (nodep != NULL) {
4158                                 new_reference(search->rbtdb, node);
4159                                 *nodep = node;
4160                         }
4161                         bind_rdataset(search->rbtdb, node, found, search->now,
4162                                       rdataset);
4163                         if (foundsig != NULL)
4164                                 bind_rdataset(search->rbtdb, node, foundsig,
4165                                               search->now, sigrdataset);
4166                         if (need_headerupdate(found, search->now) ||
4167                             (foundsig != NULL &&
4168                              need_headerupdate(foundsig, search->now))) {
4169                                 if (locktype != isc_rwlocktype_write) {
4170                                         NODE_UNLOCK(lock, locktype);
4171                                         NODE_LOCK(lock, isc_rwlocktype_write);
4172                                         locktype = isc_rwlocktype_write;
4173                                 }
4174                                 if (need_headerupdate(found, search->now))
4175                                         update_header(search->rbtdb, found,
4176                                                       search->now);
4177                                 if (foundsig != NULL &&
4178                                     need_headerupdate(foundsig, search->now)) {
4179                                         update_header(search->rbtdb, foundsig,
4180                                                       search->now);
4181                                 }
4182                         }
4183                 }
4184
4185         node_exit:
4186                 NODE_UNLOCK(lock, locktype);
4187
4188                 if (found == NULL && i > 0) {
4189                         i--;
4190                         node = search->chain.levels[i];
4191                 } else
4192                         done = ISC_TRUE;
4193
4194         } while (!done);
4195
4196         return (result);
4197 }
4198
4199 static isc_result_t
4200 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4201                   isc_stdtime_t now, dns_name_t *foundname,
4202                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4203 {
4204         dns_rbtnode_t *node;
4205         rdatasetheader_t *header, *header_next, *header_prev;
4206         rdatasetheader_t *found, *foundsig;
4207         isc_boolean_t empty_node;
4208         isc_result_t result;
4209         dns_fixedname_t fname, forigin;
4210         dns_name_t *name, *origin;
4211         rbtdb_rdatatype_t matchtype, sigmatchtype;
4212         nodelock_t *lock;
4213         isc_rwlocktype_t locktype;
4214
4215         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4216         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4217                                              dns_rdatatype_nsec);
4218
4219         do {
4220                 node = NULL;
4221                 dns_fixedname_init(&fname);
4222                 name = dns_fixedname_name(&fname);
4223                 dns_fixedname_init(&forigin);
4224                 origin = dns_fixedname_name(&forigin);
4225                 result = dns_rbtnodechain_current(&search->chain, name,
4226                                                   origin, &node);
4227                 if (result != ISC_R_SUCCESS)
4228                         return (result);
4229                 locktype = isc_rwlocktype_read;
4230                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4231                 NODE_LOCK(lock, locktype);
4232                 found = NULL;
4233                 foundsig = NULL;
4234                 empty_node = ISC_TRUE;
4235                 header_prev = NULL;
4236                 for (header = node->data;
4237                      header != NULL;
4238                      header = header_next) {
4239                         header_next = header->next;
4240                         if (header->rdh_ttl <= now) {
4241                                 /*
4242                                  * This rdataset is stale.  If no one else is
4243                                  * using the node, we can clean it up right
4244                                  * now, otherwise we mark it as stale, and the
4245                                  * node as dirty, so it will get cleaned up
4246                                  * later.
4247                                  */
4248                                 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4249                                     (locktype == isc_rwlocktype_write ||
4250                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4251                                         /*
4252                                          * We update the node's status only
4253                                          * when we can get write access.
4254                                          */
4255                                         locktype = isc_rwlocktype_write;
4256
4257                                         if (dns_rbtnode_refcurrent(node)
4258                                             == 0) {
4259                                                 isc_mem_t *m;
4260
4261                                                 m = search->rbtdb->common.mctx;
4262                                                 clean_stale_headers(
4263                                                         search->rbtdb,
4264                                                         m, header);
4265                                                 if (header_prev != NULL)
4266                                                         header_prev->next =
4267                                                                 header->next;
4268                                                 else
4269                                                         node->data = header->next;
4270                                                 free_rdataset(search->rbtdb, m,
4271                                                               header);
4272                                         } else {
4273                                                 header->attributes |=
4274                                                         RDATASET_ATTR_STALE;
4275                                                 node->dirty = 1;
4276                                                 header_prev = header;
4277                                         }
4278                                 } else
4279                                         header_prev = header;
4280                                 continue;
4281                         }
4282                         if (NONEXISTENT(header) ||
4283                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
4284                                 header_prev = header;
4285                                 continue;
4286                         }
4287                         empty_node = ISC_FALSE;
4288                         if (header->type == matchtype)
4289                                 found = header;
4290                         else if (header->type == sigmatchtype)
4291                                 foundsig = header;
4292                         header_prev = header;
4293                 }
4294                 if (found != NULL) {
4295                         result = dns_name_concatenate(name, origin,
4296                                                       foundname, NULL);
4297                         if (result != ISC_R_SUCCESS)
4298                                 goto unlock_node;
4299                         bind_rdataset(search->rbtdb, node, found,
4300                                       now, rdataset);
4301                         if (foundsig != NULL)
4302                                 bind_rdataset(search->rbtdb, node, foundsig,
4303                                               now, sigrdataset);
4304                         new_reference(search->rbtdb, node);
4305                         *nodep = node;
4306                         result = DNS_R_COVERINGNSEC;
4307                 } else if (!empty_node) {
4308                         result = ISC_R_NOTFOUND;
4309                 } else
4310                         result = dns_rbtnodechain_prev(&search->chain, NULL,
4311                                                        NULL);
4312  unlock_node:
4313                 NODE_UNLOCK(lock, locktype);
4314         } while (empty_node && result == ISC_R_SUCCESS);
4315         return (result);
4316 }
4317
4318 static isc_result_t
4319 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4320            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4321            dns_dbnode_t **nodep, dns_name_t *foundname,
4322            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4323 {
4324         dns_rbtnode_t *node = NULL;
4325         isc_result_t result;
4326         rbtdb_search_t search;
4327         isc_boolean_t cname_ok = ISC_TRUE;
4328         isc_boolean_t empty_node;
4329         nodelock_t *lock;
4330         isc_rwlocktype_t locktype;
4331         rdatasetheader_t *header, *header_prev, *header_next;
4332         rdatasetheader_t *found, *nsheader;
4333         rdatasetheader_t *foundsig, *nssig, *cnamesig;
4334         rdatasetheader_t *update, *updatesig;
4335         rbtdb_rdatatype_t sigtype, negtype;
4336
4337         UNUSED(version);
4338
4339         search.rbtdb = (dns_rbtdb_t *)db;
4340
4341         REQUIRE(VALID_RBTDB(search.rbtdb));
4342         REQUIRE(version == NULL);
4343
4344         if (now == 0)
4345                 isc_stdtime_get(&now);
4346
4347         search.rbtversion = NULL;
4348         search.serial = 1;
4349         search.options = options;
4350         search.copy_name = ISC_FALSE;
4351         search.need_cleanup = ISC_FALSE;
4352         search.wild = ISC_FALSE;
4353         search.zonecut = NULL;
4354         dns_fixedname_init(&search.zonecut_name);
4355         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4356         search.now = now;
4357         update = NULL;
4358         updatesig = NULL;
4359
4360         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4361
4362         /*
4363          * Search down from the root of the tree.  If, while going down, we
4364          * encounter a callback node, cache_zonecut_callback() will search the
4365          * rdatasets at the zone cut for a DNAME rdataset.
4366          */
4367         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4368                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
4369                                   cache_zonecut_callback, &search);
4370
4371         if (result == DNS_R_PARTIALMATCH) {
4372                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4373                         result = find_coveringnsec(&search, nodep, now,
4374                                                    foundname, rdataset,
4375                                                    sigrdataset);
4376                         if (result == DNS_R_COVERINGNSEC)
4377                                 goto tree_exit;
4378                 }
4379                 if (search.zonecut != NULL) {
4380                     result = setup_delegation(&search, nodep, foundname,
4381                                               rdataset, sigrdataset);
4382                     goto tree_exit;
4383                 } else {
4384                 find_ns:
4385                         result = find_deepest_zonecut(&search, node, nodep,
4386                                                       foundname, rdataset,
4387                                                       sigrdataset);
4388                         goto tree_exit;
4389                 }
4390         } else if (result != ISC_R_SUCCESS)
4391                 goto tree_exit;
4392
4393         /*
4394          * Certain DNSSEC types are not subject to CNAME matching
4395          * (RFC4035, section 2.5 and RFC3007).
4396          *
4397          * We don't check for RRSIG, because we don't store RRSIG records
4398          * directly.
4399          */
4400         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4401                 cname_ok = ISC_FALSE;
4402
4403         /*
4404          * We now go looking for rdata...
4405          */
4406
4407         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4408         locktype = isc_rwlocktype_read;
4409         NODE_LOCK(lock, locktype);
4410
4411         found = NULL;
4412         foundsig = NULL;
4413         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4414         negtype = RBTDB_RDATATYPE_VALUE(0, type);
4415         nsheader = NULL;
4416         nssig = NULL;
4417         cnamesig = NULL;
4418         empty_node = ISC_TRUE;
4419         header_prev = NULL;
4420         for (header = node->data; header != NULL; header = header_next) {
4421                 header_next = header->next;
4422                 if (header->rdh_ttl <= now) {
4423                         /*
4424                          * This rdataset is stale.  If no one else is using the
4425                          * node, we can clean it up right now, otherwise we
4426                          * mark it as stale, and the node as dirty, so it will
4427                          * get cleaned up later.
4428                          */
4429                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4430                             (locktype == isc_rwlocktype_write ||
4431                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4432                                 /*
4433                                  * We update the node's status only when we
4434                                  * can get write access.
4435                                  */
4436                                 locktype = isc_rwlocktype_write;
4437
4438                                 if (dns_rbtnode_refcurrent(node) == 0) {
4439                                         isc_mem_t *mctx;
4440
4441                                         mctx = search.rbtdb->common.mctx;
4442                                         clean_stale_headers(search.rbtdb, mctx,
4443                                                             header);
4444                                         if (header_prev != NULL)
4445                                                 header_prev->next =
4446                                                         header->next;
4447                                         else
4448                                                 node->data = header->next;
4449                                         free_rdataset(search.rbtdb, mctx,
4450                                                       header);
4451                                 } else {
4452                                         header->attributes |=
4453                                                 RDATASET_ATTR_STALE;
4454                                         node->dirty = 1;
4455                                         header_prev = header;
4456                                 }
4457                         } else
4458                                 header_prev = header;
4459                 } else if (EXISTS(header)) {
4460                         /*
4461                          * We now know that there is at least one active
4462                          * non-stale rdataset at this node.
4463                          */
4464                         empty_node = ISC_FALSE;
4465
4466                         /*
4467                          * If we found a type we were looking for, remember
4468                          * it.
4469                          */
4470                         if (header->type == type ||
4471                             (type == dns_rdatatype_any &&
4472                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4473                             (cname_ok && header->type ==
4474                              dns_rdatatype_cname)) {
4475                                 /*
4476                                  * We've found the answer.
4477                                  */
4478                                 found = header;
4479                                 if (header->type == dns_rdatatype_cname &&
4480                                     cname_ok &&
4481                                     cnamesig != NULL) {
4482                                         /*
4483                                          * If we've already got the CNAME RRSIG,
4484                                          * use it, otherwise change sigtype
4485                                          * so that we find it.
4486                                          */
4487                                         if (cnamesig != NULL)
4488                                                 foundsig = cnamesig;
4489                                         else
4490                                                 sigtype =
4491                                                     RBTDB_RDATATYPE_SIGCNAME;
4492                                         foundsig = cnamesig;
4493                                 }
4494                         } else if (header->type == sigtype) {
4495                                 /*
4496                                  * We've found the RRSIG rdataset for our
4497                                  * target type.  Remember it.
4498                                  */
4499                                 foundsig = header;
4500                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4501                                    header->type == negtype) {
4502                                 /*
4503                                  * We've found a negative cache entry.
4504                                  */
4505                                 found = header;
4506                         } else if (header->type == dns_rdatatype_ns) {
4507                                 /*
4508                                  * Remember a NS rdataset even if we're
4509                                  * not specifically looking for it, because
4510                                  * we might need it later.
4511                                  */
4512                                 nsheader = header;
4513                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4514                                 /*
4515                                  * If we need the NS rdataset, we'll also
4516                                  * need its signature.
4517                                  */
4518                                 nssig = header;
4519                         } else if (cname_ok &&
4520                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
4521                                 /*
4522                                  * If we get a CNAME match, we'll also need
4523                                  * its signature.
4524                                  */
4525                                 cnamesig = header;
4526                         }
4527                         header_prev = header;
4528                 } else
4529                         header_prev = header;
4530         }
4531
4532         if (empty_node) {
4533                 /*
4534                  * We have an exact match for the name, but there are no
4535                  * extant rdatasets.  That means that this node doesn't
4536                  * meaningfully exist, and that we really have a partial match.
4537                  */
4538                 NODE_UNLOCK(lock, locktype);
4539                 goto find_ns;
4540         }
4541
4542         /*
4543          * If we didn't find what we were looking for...
4544          */
4545         if (found == NULL ||
4546             (DNS_TRUST_ADDITIONAL(found->trust) &&
4547              ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4548             (found->trust == dns_trust_glue &&
4549              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4550             (DNS_TRUST_PENDING(found->trust) &&
4551              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4552                 /*
4553                  * If there is an NS rdataset at this node, then this is the
4554                  * deepest zone cut.
4555                  */
4556                 if (nsheader != NULL) {
4557                         if (nodep != NULL) {
4558                                 new_reference(search.rbtdb, node);
4559                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4560                                 *nodep = node;
4561                         }
4562                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
4563                                       rdataset);
4564                         if (need_headerupdate(nsheader, search.now))
4565                                 update = nsheader;
4566                         if (nssig != NULL) {
4567                                 bind_rdataset(search.rbtdb, node, nssig,
4568                                               search.now, sigrdataset);
4569                                 if (need_headerupdate(nssig, search.now))
4570                                         updatesig = nssig;
4571                         }
4572                         result = DNS_R_DELEGATION;
4573                         goto node_exit;
4574                 }
4575
4576                 /*
4577                  * Go find the deepest zone cut.
4578                  */
4579                 NODE_UNLOCK(lock, locktype);
4580                 goto find_ns;
4581         }
4582
4583         /*
4584          * We found what we were looking for, or we found a CNAME.
4585          */
4586
4587         if (nodep != NULL) {
4588                 new_reference(search.rbtdb, node);
4589                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4590                 *nodep = node;
4591         }
4592
4593         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4594                 /*
4595                  * We found a negative cache entry.
4596                  */
4597                 if (NXDOMAIN(found))
4598                         result = DNS_R_NCACHENXDOMAIN;
4599                 else
4600                         result = DNS_R_NCACHENXRRSET;
4601         } else if (type != found->type &&
4602                    type != dns_rdatatype_any &&
4603                    found->type == dns_rdatatype_cname) {
4604                 /*
4605                  * We weren't doing an ANY query and we found a CNAME instead
4606                  * of the type we were looking for, so we need to indicate
4607                  * that result to the caller.
4608                  */
4609                 result = DNS_R_CNAME;
4610         } else {
4611                 /*
4612                  * An ordinary successful query!
4613                  */
4614                 result = ISC_R_SUCCESS;
4615         }
4616
4617         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4618             result == DNS_R_NCACHENXRRSET) {
4619                 bind_rdataset(search.rbtdb, node, found, search.now,
4620                               rdataset);
4621                 if (need_headerupdate(found, search.now))
4622                         update = found;
4623                 if (foundsig != NULL) {
4624                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
4625                                       sigrdataset);
4626                         if (need_headerupdate(foundsig, search.now))
4627                                 updatesig = foundsig;
4628                 }
4629         }
4630
4631  node_exit:
4632         if ((update != NULL || updatesig != NULL) &&
4633             locktype != isc_rwlocktype_write) {
4634                 NODE_UNLOCK(lock, locktype);
4635                 NODE_LOCK(lock, isc_rwlocktype_write);
4636                 locktype = isc_rwlocktype_write;
4637         }
4638         if (update != NULL && need_headerupdate(update, search.now))
4639                 update_header(search.rbtdb, update, search.now);
4640         if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4641                 update_header(search.rbtdb, updatesig, search.now);
4642
4643         NODE_UNLOCK(lock, locktype);
4644
4645  tree_exit:
4646         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4647
4648         /*
4649          * If we found a zonecut but aren't going to use it, we have to
4650          * let go of it.
4651          */
4652         if (search.need_cleanup) {
4653                 node = search.zonecut;
4654                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4655
4656                 NODE_LOCK(lock, isc_rwlocktype_read);
4657                 decrement_reference(search.rbtdb, node, 0,
4658                                     isc_rwlocktype_read, isc_rwlocktype_none,
4659                                     ISC_FALSE);
4660                 NODE_UNLOCK(lock, isc_rwlocktype_read);
4661         }
4662
4663         dns_rbtnodechain_reset(&search.chain);
4664
4665         return (result);
4666 }
4667
4668 static isc_result_t
4669 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4670                   isc_stdtime_t now, dns_dbnode_t **nodep,
4671                   dns_name_t *foundname,
4672                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4673 {
4674         dns_rbtnode_t *node = NULL;
4675         nodelock_t *lock;
4676         isc_result_t result;
4677         rbtdb_search_t search;
4678         rdatasetheader_t *header, *header_prev, *header_next;
4679         rdatasetheader_t *found, *foundsig;
4680         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4681         isc_rwlocktype_t locktype;
4682
4683         search.rbtdb = (dns_rbtdb_t *)db;
4684
4685         REQUIRE(VALID_RBTDB(search.rbtdb));
4686
4687         if (now == 0)
4688                 isc_stdtime_get(&now);
4689
4690         search.rbtversion = NULL;
4691         search.serial = 1;
4692         search.options = options;
4693         search.copy_name = ISC_FALSE;
4694         search.need_cleanup = ISC_FALSE;
4695         search.wild = ISC_FALSE;
4696         search.zonecut = NULL;
4697         dns_fixedname_init(&search.zonecut_name);
4698         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4699         search.now = now;
4700
4701         if ((options & DNS_DBFIND_NOEXACT) != 0)
4702                 rbtoptions |= DNS_RBTFIND_NOEXACT;
4703
4704         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4705
4706         /*
4707          * Search down from the root of the tree.
4708          */
4709         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4710                                   &search.chain, rbtoptions, NULL, &search);
4711
4712         if (result == DNS_R_PARTIALMATCH) {
4713         find_ns:
4714                 result = find_deepest_zonecut(&search, node, nodep, foundname,
4715                                               rdataset, sigrdataset);
4716                 goto tree_exit;
4717         } else if (result != ISC_R_SUCCESS)
4718                 goto tree_exit;
4719
4720         /*
4721          * We now go looking for an NS rdataset at the node.
4722          */
4723
4724         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4725         locktype = isc_rwlocktype_read;
4726         NODE_LOCK(lock, locktype);
4727
4728         found = NULL;
4729         foundsig = NULL;
4730         header_prev = NULL;
4731         for (header = node->data; header != NULL; header = header_next) {
4732                 header_next = header->next;
4733                 if (header->rdh_ttl <= now) {
4734                         /*
4735                          * This rdataset is stale.  If no one else is using the
4736                          * node, we can clean it up right now, otherwise we
4737                          * mark it as stale, and the node as dirty, so it will
4738                          * get cleaned up later.
4739                          */
4740                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4741                             (locktype == isc_rwlocktype_write ||
4742                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4743                                 /*
4744                                  * We update the node's status only when we
4745                                  * can get write access.
4746                                  */
4747                                 locktype = isc_rwlocktype_write;
4748
4749                                 if (dns_rbtnode_refcurrent(node) == 0) {
4750                                         isc_mem_t *mctx;
4751
4752                                         mctx = search.rbtdb->common.mctx;
4753                                         clean_stale_headers(search.rbtdb, mctx,
4754                                                             header);
4755                                         if (header_prev != NULL)
4756                                                 header_prev->next =
4757                                                         header->next;
4758                                         else
4759                                                 node->data = header->next;
4760                                         free_rdataset(search.rbtdb, mctx,
4761                                                       header);
4762                                 } else {
4763                                         header->attributes |=
4764                                                 RDATASET_ATTR_STALE;
4765                                         node->dirty = 1;
4766                                         header_prev = header;
4767                                 }
4768                         } else
4769                                 header_prev = header;
4770                 } else if (EXISTS(header)) {
4771                         /*
4772                          * If we found a type we were looking for, remember
4773                          * it.
4774                          */
4775                         if (header->type == dns_rdatatype_ns) {
4776                                 /*
4777                                  * Remember a NS rdataset even if we're
4778                                  * not specifically looking for it, because
4779                                  * we might need it later.
4780                                  */
4781                                 found = header;
4782                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4783                                 /*
4784                                  * If we need the NS rdataset, we'll also
4785                                  * need its signature.
4786                                  */
4787                                 foundsig = header;
4788                         }
4789                         header_prev = header;
4790                 } else
4791                         header_prev = header;
4792         }
4793
4794         if (found == NULL) {
4795                 /*
4796                  * No NS records here.
4797                  */
4798                 NODE_UNLOCK(lock, locktype);
4799                 goto find_ns;
4800         }
4801
4802         if (nodep != NULL) {
4803                 new_reference(search.rbtdb, node);
4804                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4805                 *nodep = node;
4806         }
4807
4808         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4809         if (foundsig != NULL)
4810                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4811                               sigrdataset);
4812
4813         if (need_headerupdate(found, search.now) ||
4814             (foundsig != NULL &&  need_headerupdate(foundsig, search.now))) {
4815                 if (locktype != isc_rwlocktype_write) {
4816                         NODE_UNLOCK(lock, locktype);
4817                         NODE_LOCK(lock, isc_rwlocktype_write);
4818                         locktype = isc_rwlocktype_write;
4819                 }
4820                 if (need_headerupdate(found, search.now))
4821                         update_header(search.rbtdb, found, search.now);
4822                 if (foundsig != NULL &&
4823                     need_headerupdate(foundsig, search.now)) {
4824                         update_header(search.rbtdb, foundsig, search.now);
4825                 }
4826         }
4827
4828         NODE_UNLOCK(lock, locktype);
4829
4830  tree_exit:
4831         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4832
4833         INSIST(!search.need_cleanup);
4834
4835         dns_rbtnodechain_reset(&search.chain);
4836
4837         if (result == DNS_R_DELEGATION)
4838                 result = ISC_R_SUCCESS;
4839
4840         return (result);
4841 }
4842
4843 static void
4844 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4845         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4846         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4847         unsigned int refs;
4848
4849         REQUIRE(VALID_RBTDB(rbtdb));
4850         REQUIRE(targetp != NULL && *targetp == NULL);
4851
4852         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4853         dns_rbtnode_refincrement(node, &refs);
4854         INSIST(refs != 0);
4855         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4856
4857         *targetp = source;
4858 }
4859
4860 static void
4861 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4862         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4863         dns_rbtnode_t *node;
4864         isc_boolean_t want_free = ISC_FALSE;
4865         isc_boolean_t inactive = ISC_FALSE;
4866         rbtdb_nodelock_t *nodelock;
4867
4868         REQUIRE(VALID_RBTDB(rbtdb));
4869         REQUIRE(targetp != NULL && *targetp != NULL);
4870
4871         node = (dns_rbtnode_t *)(*targetp);
4872         nodelock = &rbtdb->node_locks[node->locknum];
4873
4874         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4875
4876         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4877                                 isc_rwlocktype_none, ISC_FALSE)) {
4878                 if (isc_refcount_current(&nodelock->references) == 0 &&
4879                     nodelock->exiting) {
4880                         inactive = ISC_TRUE;
4881                 }
4882         }
4883
4884         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4885
4886         *targetp = NULL;
4887
4888         if (inactive) {
4889                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4890                 rbtdb->active--;
4891                 if (rbtdb->active == 0)
4892                         want_free = ISC_TRUE;
4893                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4894                 if (want_free) {
4895                         char buf[DNS_NAME_FORMATSIZE];
4896                         if (dns_name_dynamic(&rbtdb->common.origin))
4897                                 dns_name_format(&rbtdb->common.origin, buf,
4898                                                 sizeof(buf));
4899                         else
4900                                 strcpy(buf, "<UNKNOWN>");
4901                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4902                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4903                                       "calling free_rbtdb(%s)", buf);
4904                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
4905                 }
4906         }
4907 }
4908
4909 static isc_result_t
4910 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4911         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4912         dns_rbtnode_t *rbtnode = node;
4913         rdatasetheader_t *header;
4914         isc_boolean_t force_expire = ISC_FALSE;
4915         /*
4916          * These are the category and module used by the cache cleaner.
4917          */
4918         isc_boolean_t log = ISC_FALSE;
4919         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4920         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4921         int level = ISC_LOG_DEBUG(2);
4922         char printname[DNS_NAME_FORMATSIZE];
4923
4924         REQUIRE(VALID_RBTDB(rbtdb));
4925
4926         /*
4927          * Caller must hold a tree lock.
4928          */
4929
4930         if (now == 0)
4931                 isc_stdtime_get(&now);
4932
4933         if (isc_mem_isovermem(rbtdb->common.mctx)) {
4934                 isc_uint32_t val;
4935
4936                 isc_random_get(&val);
4937                 /*
4938                  * XXXDCL Could stand to have a better policy, like LRU.
4939                  */
4940                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
4941
4942                 /*
4943                  * Note that 'log' can be true IFF overmem is also true.
4944                  * overmem can currently only be true for cache
4945                  * databases -- hence all of the "overmem cache" log strings.
4946                  */
4947                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
4948                 if (log)
4949                         isc_log_write(dns_lctx, category, module, level,
4950                                       "overmem cache: %s %s",
4951                                       force_expire ? "FORCE" : "check",
4952                                       dns_rbt_formatnodename(rbtnode,
4953                                                            printname,
4954                                                            sizeof(printname)));
4955         }
4956
4957         /*
4958          * We may not need write access, but this code path is not performance
4959          * sensitive, so it should be okay to always lock as a writer.
4960          */
4961         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4962                   isc_rwlocktype_write);
4963
4964         for (header = rbtnode->data; header != NULL; header = header->next)
4965                 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
4966                         /*
4967                          * We don't check if refcurrent(rbtnode) == 0 and try
4968                          * to free like we do in cache_find(), because
4969                          * refcurrent(rbtnode) must be non-zero.  This is so
4970                          * because 'node' is an argument to the function.
4971                          */
4972                         header->attributes |= RDATASET_ATTR_STALE;
4973                         rbtnode->dirty = 1;
4974                         if (log)
4975                                 isc_log_write(dns_lctx, category, module,
4976                                               level, "overmem cache: stale %s",
4977                                               printname);
4978                 } else if (force_expire) {
4979                         if (! RETAIN(header)) {
4980                                 set_ttl(rbtdb, header, 0);
4981                                 header->attributes |= RDATASET_ATTR_STALE;
4982                                 rbtnode->dirty = 1;
4983                         } else if (log) {
4984                                 isc_log_write(dns_lctx, category, module,
4985                                               level, "overmem cache: "
4986                                               "reprieve by RETAIN() %s",
4987                                               printname);
4988                         }
4989                 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
4990                         isc_log_write(dns_lctx, category, module, level,
4991                                       "overmem cache: saved %s", printname);
4992
4993         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4994                     isc_rwlocktype_write);
4995
4996         return (ISC_R_SUCCESS);
4997 }
4998
4999 static void
5000 overmem(dns_db_t *db, isc_boolean_t overmem) {
5001         /* This is an empty callback.  See adb.c:water() */
5002
5003         UNUSED(db);
5004         UNUSED(overmem);
5005
5006         return;
5007 }
5008
5009 static void
5010 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5011         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5012         dns_rbtnode_t *rbtnode = node;
5013         isc_boolean_t first;
5014
5015         REQUIRE(VALID_RBTDB(rbtdb));
5016
5017         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5018                   isc_rwlocktype_read);
5019
5020         fprintf(out, "node %p, %u references, locknum = %u\n",
5021                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5022                 rbtnode->locknum);
5023         if (rbtnode->data != NULL) {
5024                 rdatasetheader_t *current, *top_next;
5025
5026                 for (current = rbtnode->data; current != NULL;
5027                      current = top_next) {
5028                         top_next = current->next;
5029                         first = ISC_TRUE;
5030                         fprintf(out, "\ttype %u", current->type);
5031                         do {
5032                                 if (!first)
5033                                         fprintf(out, "\t");
5034                                 first = ISC_FALSE;
5035                                 fprintf(out,
5036                                         "\tserial = %lu, ttl = %u, "
5037                                         "trust = %u, attributes = %u, "
5038                                         "resign = %u\n",
5039                                         (unsigned long)current->serial,
5040                                         current->rdh_ttl,
5041                                         current->trust,
5042                                         current->attributes,
5043                                         current->resign);
5044                                 current = current->down;
5045                         } while (current != NULL);
5046                 }
5047         } else
5048                 fprintf(out, "(empty)\n");
5049
5050         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5051                     isc_rwlocktype_read);
5052 }
5053
5054 static isc_result_t
5055 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5056 {
5057         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5058         rbtdb_dbiterator_t *rbtdbiter;
5059
5060         REQUIRE(VALID_RBTDB(rbtdb));
5061
5062         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5063         if (rbtdbiter == NULL)
5064                 return (ISC_R_NOMEMORY);
5065
5066         rbtdbiter->common.methods = &dbiterator_methods;
5067         rbtdbiter->common.db = NULL;
5068         dns_db_attach(db, &rbtdbiter->common.db);
5069         rbtdbiter->common.relative_names =
5070                         ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5071         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5072         rbtdbiter->common.cleaning = ISC_FALSE;
5073         rbtdbiter->paused = ISC_TRUE;
5074         rbtdbiter->tree_locked = isc_rwlocktype_none;
5075         rbtdbiter->result = ISC_R_SUCCESS;
5076         dns_fixedname_init(&rbtdbiter->name);
5077         dns_fixedname_init(&rbtdbiter->origin);
5078         rbtdbiter->node = NULL;
5079         rbtdbiter->delete = 0;
5080         rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5081         rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5082         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5083         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5084         dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5085         if (rbtdbiter->nsec3only)
5086                 rbtdbiter->current = &rbtdbiter->nsec3chain;
5087         else
5088                 rbtdbiter->current = &rbtdbiter->chain;
5089
5090         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5091
5092         return (ISC_R_SUCCESS);
5093 }
5094
5095 static isc_result_t
5096 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5097                   dns_rdatatype_t type, dns_rdatatype_t covers,
5098                   isc_stdtime_t now, dns_rdataset_t *rdataset,
5099                   dns_rdataset_t *sigrdataset)
5100 {
5101         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5102         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5103         rdatasetheader_t *header, *header_next, *found, *foundsig;
5104         rbtdb_serial_t serial;
5105         rbtdb_version_t *rbtversion = version;
5106         isc_boolean_t close_version = ISC_FALSE;
5107         rbtdb_rdatatype_t matchtype, sigmatchtype;
5108
5109         REQUIRE(VALID_RBTDB(rbtdb));
5110         REQUIRE(type != dns_rdatatype_any);
5111
5112         if (rbtversion == NULL) {
5113                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5114                 close_version = ISC_TRUE;
5115         }
5116         serial = rbtversion->serial;
5117         now = 0;
5118
5119         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5120                   isc_rwlocktype_read);
5121
5122         found = NULL;
5123         foundsig = NULL;
5124         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5125         if (covers == 0)
5126                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5127         else
5128                 sigmatchtype = 0;
5129
5130         for (header = rbtnode->data; header != NULL; header = header_next) {
5131                 header_next = header->next;
5132                 do {
5133                         if (header->serial <= serial &&
5134                             !IGNORE(header)) {
5135                                 /*
5136                                  * Is this a "this rdataset doesn't
5137                                  * exist" record?
5138                                  */
5139                                 if (NONEXISTENT(header))
5140                                         header = NULL;
5141                                 break;
5142                         } else
5143                                 header = header->down;
5144                 } while (header != NULL);
5145                 if (header != NULL) {
5146                         /*
5147                          * We have an active, extant rdataset.  If it's a
5148                          * type we're looking for, remember it.
5149                          */
5150                         if (header->type == matchtype) {
5151                                 found = header;
5152                                 if (foundsig != NULL)
5153                                         break;
5154                         } else if (header->type == sigmatchtype) {
5155                                 foundsig = header;
5156                                 if (found != NULL)
5157                                         break;
5158                         }
5159                 }
5160         }
5161         if (found != NULL) {
5162                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5163                 if (foundsig != NULL)
5164                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5165                                       sigrdataset);
5166         }
5167
5168         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5169                     isc_rwlocktype_read);
5170
5171         if (close_version)
5172                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5173                              ISC_FALSE);
5174
5175         if (found == NULL)
5176                 return (ISC_R_NOTFOUND);
5177
5178         return (ISC_R_SUCCESS);
5179 }
5180
5181 static isc_result_t
5182 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5183                    dns_rdatatype_t type, dns_rdatatype_t covers,
5184                    isc_stdtime_t now, dns_rdataset_t *rdataset,
5185                    dns_rdataset_t *sigrdataset)
5186 {
5187         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5188         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5189         rdatasetheader_t *header, *header_next, *found, *foundsig;
5190         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5191         isc_result_t result;
5192         nodelock_t *lock;
5193         isc_rwlocktype_t locktype;
5194
5195         REQUIRE(VALID_RBTDB(rbtdb));
5196         REQUIRE(type != dns_rdatatype_any);
5197
5198         UNUSED(version);
5199
5200         result = ISC_R_SUCCESS;
5201
5202         if (now == 0)
5203                 isc_stdtime_get(&now);
5204
5205         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5206         locktype = isc_rwlocktype_read;
5207         NODE_LOCK(lock, locktype);
5208
5209         found = NULL;
5210         foundsig = NULL;
5211         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5212         negtype = RBTDB_RDATATYPE_VALUE(0, type);
5213         if (covers == 0)
5214                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5215         else
5216                 sigmatchtype = 0;
5217
5218         for (header = rbtnode->data; header != NULL; header = header_next) {
5219                 header_next = header->next;
5220                 if (header->rdh_ttl <= now) {
5221                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5222                             (locktype == isc_rwlocktype_write ||
5223                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5224                                 /*
5225                                  * We update the node's status only when we
5226                                  * can get write access.
5227                                  */
5228                                 locktype = isc_rwlocktype_write;
5229
5230                                 /*
5231                                  * We don't check if refcurrent(rbtnode) == 0
5232                                  * and try to free like we do in cache_find(),
5233                                  * because refcurrent(rbtnode) must be
5234                                  * non-zero.  This is so because 'node' is an
5235                                  * argument to the function.
5236                                  */
5237                                 header->attributes |= RDATASET_ATTR_STALE;
5238                                 rbtnode->dirty = 1;
5239                         }
5240                 } else if (EXISTS(header)) {
5241                         if (header->type == matchtype)
5242                                 found = header;
5243                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5244                                  header->type == negtype)
5245                                 found = header;
5246                         else if (header->type == sigmatchtype)
5247                                 foundsig = header;
5248                 }
5249         }
5250         if (found != NULL) {
5251                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5252                 if (foundsig != NULL)
5253                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5254                                       sigrdataset);
5255         }
5256
5257         NODE_UNLOCK(lock, locktype);
5258
5259         if (found == NULL)
5260                 return (ISC_R_NOTFOUND);
5261
5262         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
5263                 /*
5264                  * We found a negative cache entry.
5265                  */
5266                 if (NXDOMAIN(found))
5267                         result = DNS_R_NCACHENXDOMAIN;
5268                 else
5269                         result = DNS_R_NCACHENXRRSET;
5270         }
5271
5272         return (result);
5273 }
5274
5275 static isc_result_t
5276 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5277              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5278 {
5279         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5280         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5281         rbtdb_version_t *rbtversion = version;
5282         rbtdb_rdatasetiter_t *iterator;
5283         unsigned int refs;
5284
5285         REQUIRE(VALID_RBTDB(rbtdb));
5286
5287         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5288         if (iterator == NULL)
5289                 return (ISC_R_NOMEMORY);
5290
5291         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5292                 now = 0;
5293                 if (rbtversion == NULL)
5294                         currentversion(db,
5295                                  (dns_dbversion_t **) (void *)(&rbtversion));
5296                 else {
5297                         unsigned int refs;
5298
5299                         isc_refcount_increment(&rbtversion->references,
5300                                                &refs);
5301                         INSIST(refs > 1);
5302                 }
5303         } else {
5304                 if (now == 0)
5305                         isc_stdtime_get(&now);
5306                 rbtversion = NULL;
5307         }
5308
5309         iterator->common.magic = DNS_RDATASETITER_MAGIC;
5310         iterator->common.methods = &rdatasetiter_methods;
5311         iterator->common.db = db;
5312         iterator->common.node = node;
5313         iterator->common.version = (dns_dbversion_t *)rbtversion;
5314         iterator->common.now = now;
5315
5316         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5317
5318         dns_rbtnode_refincrement(rbtnode, &refs);
5319         INSIST(refs != 0);
5320
5321         iterator->current = NULL;
5322
5323         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5324
5325         *iteratorp = (dns_rdatasetiter_t *)iterator;
5326
5327         return (ISC_R_SUCCESS);
5328 }
5329
5330 static isc_boolean_t
5331 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5332         rdatasetheader_t *header, *header_next;
5333         isc_boolean_t cname, other_data;
5334         dns_rdatatype_t rdtype;
5335
5336         /*
5337          * The caller must hold the node lock.
5338          */
5339
5340         /*
5341          * Look for CNAME and "other data" rdatasets active in our version.
5342          */
5343         cname = ISC_FALSE;
5344         other_data = ISC_FALSE;
5345         for (header = node->data; header != NULL; header = header_next) {
5346                 header_next = header->next;
5347                 if (header->type == dns_rdatatype_cname) {
5348                         /*
5349                          * Look for an active extant CNAME.
5350                          */
5351                         do {
5352                                 if (header->serial <= serial &&
5353                                     !IGNORE(header)) {
5354                                         /*
5355                                          * Is this a "this rdataset doesn't
5356                                          * exist" record?
5357                                          */
5358                                         if (NONEXISTENT(header))
5359                                                 header = NULL;
5360                                         break;
5361                                 } else
5362                                         header = header->down;
5363                         } while (header != NULL);
5364                         if (header != NULL)
5365                                 cname = ISC_TRUE;
5366                 } else {
5367                         /*
5368                          * Look for active extant "other data".
5369                          *
5370                          * "Other data" is any rdataset whose type is not
5371                          * KEY, NSEC, SIG or RRSIG.
5372                          */
5373                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
5374                         if (rdtype != dns_rdatatype_key &&
5375                             rdtype != dns_rdatatype_sig &&
5376                             rdtype != dns_rdatatype_nsec &&
5377                             rdtype != dns_rdatatype_rrsig) {
5378                                 /*
5379                                  * Is it active and extant?
5380                                  */
5381                                 do {
5382                                         if (header->serial <= serial &&
5383                                             !IGNORE(header)) {
5384                                                 /*
5385                                                  * Is this a "this rdataset
5386                                                  * doesn't exist" record?
5387                                                  */
5388                                                 if (NONEXISTENT(header))
5389                                                         header = NULL;
5390                                                 break;
5391                                         } else
5392                                                 header = header->down;
5393                                 } while (header != NULL);
5394                                 if (header != NULL)
5395                                         other_data = ISC_TRUE;
5396                         }
5397                 }
5398         }
5399
5400         if (cname && other_data)
5401                 return (ISC_TRUE);
5402
5403         return (ISC_FALSE);
5404 }
5405
5406 static isc_result_t
5407 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5408         isc_result_t result;
5409
5410         INSIST(!IS_CACHE(rbtdb));
5411         INSIST(newheader->heap_index == 0);
5412         INSIST(!ISC_LINK_LINKED(newheader, link));
5413
5414         result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5415         return (result);
5416 }
5417
5418 static isc_result_t
5419 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5420     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5421     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5422 {
5423         rbtdb_changed_t *changed = NULL;
5424         rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
5425         unsigned char *merged;
5426         isc_result_t result;
5427         isc_boolean_t header_nx;
5428         isc_boolean_t newheader_nx;
5429         isc_boolean_t merge;
5430         dns_rdatatype_t rdtype, covers;
5431         rbtdb_rdatatype_t negtype, sigtype;
5432         dns_trust_t trust;
5433         int idx;
5434
5435         /*
5436          * Add an rdatasetheader_t to a node.
5437          */
5438
5439         /*
5440          * Caller must be holding the node lock.
5441          */
5442
5443         if ((options & DNS_DBADD_MERGE) != 0) {
5444                 REQUIRE(rbtversion != NULL);
5445                 merge = ISC_TRUE;
5446         } else
5447                 merge = ISC_FALSE;
5448
5449         if ((options & DNS_DBADD_FORCE) != 0)
5450                 trust = dns_trust_ultimate;
5451         else
5452                 trust = newheader->trust;
5453
5454         if (rbtversion != NULL && !loading) {
5455                 /*
5456                  * We always add a changed record, even if no changes end up
5457                  * being made to this node, because it's harmless and
5458                  * simplifies the code.
5459                  */
5460                 changed = add_changed(rbtdb, rbtversion, rbtnode);
5461                 if (changed == NULL) {
5462                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5463                         return (ISC_R_NOMEMORY);
5464                 }
5465         }
5466
5467         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5468         topheader_prev = NULL;
5469         sigheader = NULL;
5470         negtype = 0;
5471         if (rbtversion == NULL && !newheader_nx) {
5472                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5473                 if (rdtype == 0) {
5474                         /*
5475                          * We're adding a negative cache entry.
5476                          */
5477                         covers = RBTDB_RDATATYPE_EXT(newheader->type);
5478                         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
5479                                                         covers);
5480                         for (topheader = rbtnode->data;
5481                              topheader != NULL;
5482                              topheader = topheader->next) {
5483                                 /*
5484                                  * If we're adding an negative cache entry
5485                                  * which covers all types (NXDOMAIN,
5486                                  * NODATA(QTYPE=ANY)).
5487                                  *
5488                                  * We make all other data stale so that the
5489                                  * only rdataset that can be found at this
5490                                  * node is the negative cache entry.
5491                                  *
5492                                  * Otherwise look for any RRSIGs of the
5493                                  * given type so they can be marked stale
5494                                  * later.
5495                                  */
5496                                 if (covers == dns_rdatatype_any) {
5497                                         set_ttl(rbtdb, topheader, 0);
5498                                         topheader->attributes |=
5499                                                 RDATASET_ATTR_STALE;
5500                                         rbtnode->dirty = 1;
5501                                 } else if (topheader->type == sigtype)
5502                                         sigheader = topheader;
5503                         }
5504                         if (covers == dns_rdatatype_any)
5505                                 goto find_header;
5506                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5507                 } else {
5508                         /*
5509                          * We're adding something that isn't a
5510                          * negative cache entry.  Look for an extant
5511                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5512                          * cache entry.
5513                          */
5514                         for (topheader = rbtnode->data;
5515                              topheader != NULL;
5516                              topheader = topheader->next) {
5517                                 if (topheader->type ==
5518                                     RBTDB_RDATATYPE_NCACHEANY)
5519                                         break;
5520                         }
5521                         if (topheader != NULL && EXISTS(topheader) &&
5522                             topheader->rdh_ttl > now) {
5523                                 /*
5524                                  * Found one.
5525                                  */
5526                                 if (trust < topheader->trust) {
5527                                         /*
5528                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
5529                                          * is more trusted.
5530                                          */
5531                                         free_rdataset(rbtdb,
5532                                                       rbtdb->common.mctx,
5533                                                       newheader);
5534                                         if (addedrdataset != NULL)
5535                                                 bind_rdataset(rbtdb, rbtnode,
5536                                                               topheader, now,
5537                                                               addedrdataset);
5538                                         return (DNS_R_UNCHANGED);
5539                                 }
5540                                 /*
5541                                  * The new rdataset is better.  Expire the
5542                                  * NXDOMAIN/NODATA(QTYPE=ANY).
5543                                  */
5544                                 set_ttl(rbtdb, topheader, 0);
5545                                 topheader->attributes |= RDATASET_ATTR_STALE;
5546                                 rbtnode->dirty = 1;
5547                                 topheader = NULL;
5548                                 goto find_header;
5549                         }
5550                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5551                 }
5552         }
5553
5554         for (topheader = rbtnode->data;
5555              topheader != NULL;
5556              topheader = topheader->next) {
5557                 if (topheader->type == newheader->type ||
5558                     topheader->type == negtype)
5559                         break;
5560                 topheader_prev = topheader;
5561         }
5562
5563  find_header:
5564         /*
5565          * If header isn't NULL, we've found the right type.  There may be
5566          * IGNORE rdatasets between the top of the chain and the first real
5567          * data.  We skip over them.
5568          */
5569         header = topheader;
5570         while (header != NULL && IGNORE(header))
5571                 header = header->down;
5572         if (header != NULL) {
5573                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5574
5575                 /*
5576                  * Deleting an already non-existent rdataset has no effect.
5577                  */
5578                 if (header_nx && newheader_nx) {
5579                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5580                         return (DNS_R_UNCHANGED);
5581                 }
5582
5583                 /*
5584                  * Trying to add an rdataset with lower trust to a cache DB
5585                  * has no effect, provided that the cache data isn't stale.
5586                  */
5587                 if (rbtversion == NULL && trust < header->trust &&
5588                     (header->rdh_ttl > now || header_nx)) {
5589                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5590                         if (addedrdataset != NULL)
5591                                 bind_rdataset(rbtdb, rbtnode, header, now,
5592                                               addedrdataset);
5593                         return (DNS_R_UNCHANGED);
5594                 }
5595
5596                 /*
5597                  * Don't merge if a nonexistent rdataset is involved.
5598                  */
5599                 if (merge && (header_nx || newheader_nx))
5600                         merge = ISC_FALSE;
5601
5602                 /*
5603                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5604                  * that is the union of 'newheader' and 'header'.
5605                  */
5606                 if (merge) {
5607                         unsigned int flags = 0;
5608                         INSIST(rbtversion->serial >= header->serial);
5609                         merged = NULL;
5610                         result = ISC_R_SUCCESS;
5611
5612                         if ((options & DNS_DBADD_EXACT) != 0)
5613                                 flags |= DNS_RDATASLAB_EXACT;
5614                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5615                              newheader->rdh_ttl != header->rdh_ttl)
5616                                         result = DNS_R_NOTEXACT;
5617                         else if (newheader->rdh_ttl != header->rdh_ttl)
5618                                 flags |= DNS_RDATASLAB_FORCE;
5619                         if (result == ISC_R_SUCCESS)
5620                                 result = dns_rdataslab_merge(
5621                                              (unsigned char *)header,
5622                                              (unsigned char *)newheader,
5623                                              (unsigned int)(sizeof(*newheader)),
5624                                              rbtdb->common.mctx,
5625                                              rbtdb->common.rdclass,
5626                                              (dns_rdatatype_t)header->type,
5627                                              flags, &merged);
5628                         if (result == ISC_R_SUCCESS) {
5629                                 /*
5630                                  * If 'header' has the same serial number as
5631                                  * we do, we could clean it up now if we knew
5632                                  * that our caller had no references to it.
5633                                  * We don't know this, however, so we leave it
5634                                  * alone.  It will get cleaned up when
5635                                  * clean_zone_node() runs.
5636                                  */
5637                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5638                                               newheader);
5639                                 newheader = (rdatasetheader_t *)merged;
5640                                 if (loading && RESIGN(newheader) &&
5641                                     RESIGN(header) &&
5642                                     header->resign < newheader->resign)
5643                                         newheader->resign = header->resign;
5644                         } else {
5645                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5646                                               newheader);
5647                                 return (result);
5648                         }
5649                 }
5650                 /*
5651                  * Don't replace existing NS, A and AAAA RRsets
5652                  * in the cache if they are already exist.  This
5653                  * prevents named being locked to old servers.
5654                  * Don't lower trust of existing record if the
5655                  * update is forced.
5656                  */
5657                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5658                     header->type == dns_rdatatype_ns &&
5659                     !header_nx && !newheader_nx &&
5660                     header->trust >= newheader->trust &&
5661                     dns_rdataslab_equalx((unsigned char *)header,
5662                                          (unsigned char *)newheader,
5663                                          (unsigned int)(sizeof(*newheader)),
5664                                          rbtdb->common.rdclass,
5665                                          (dns_rdatatype_t)header->type)) {
5666                         /*
5667                          * Honour the new ttl if it is less than the
5668                          * older one.
5669                          */
5670                         if (header->rdh_ttl > newheader->rdh_ttl)
5671                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5672                         if (header->noqname == NULL &&
5673                             newheader->noqname != NULL) {
5674                                 header->noqname = newheader->noqname;
5675                                 newheader->noqname = NULL;
5676                         }
5677                         if (header->closest == NULL &&
5678                             newheader->closest != NULL) {
5679                                 header->closest = newheader->closest;
5680                                 newheader->closest = NULL;
5681                         }
5682                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5683                         if (addedrdataset != NULL)
5684                                 bind_rdataset(rbtdb, rbtnode, header, now,
5685                                               addedrdataset);
5686                         return (ISC_R_SUCCESS);
5687                 }
5688                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5689                     (header->type == dns_rdatatype_a ||
5690                      header->type == dns_rdatatype_aaaa) &&
5691                     !header_nx && !newheader_nx &&
5692                     header->trust >= newheader->trust &&
5693                     dns_rdataslab_equal((unsigned char *)header,
5694                                         (unsigned char *)newheader,
5695                                         (unsigned int)(sizeof(*newheader)))) {
5696                         /*
5697                          * Honour the new ttl if it is less than the
5698                          * older one.
5699                          */
5700                         if (header->rdh_ttl > newheader->rdh_ttl)
5701                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5702                         if (header->noqname == NULL &&
5703                             newheader->noqname != NULL) {
5704                                 header->noqname = newheader->noqname;
5705                                 newheader->noqname = NULL;
5706                         }
5707                         if (header->closest == NULL &&
5708                             newheader->closest != NULL) {
5709                                 header->closest = newheader->closest;
5710                                 newheader->closest = NULL;
5711                         }
5712                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5713                         if (addedrdataset != NULL)
5714                                 bind_rdataset(rbtdb, rbtnode, header, now,
5715                                               addedrdataset);
5716                         return (ISC_R_SUCCESS);
5717                 }
5718                 INSIST(rbtversion == NULL ||
5719                        rbtversion->serial >= topheader->serial);
5720                 if (topheader_prev != NULL)
5721                         topheader_prev->next = newheader;
5722                 else
5723                         rbtnode->data = newheader;
5724                 newheader->next = topheader->next;
5725                 if (loading) {
5726                         /*
5727                          * There are no other references to 'header' when
5728                          * loading, so we MAY clean up 'header' now.
5729                          * Since we don't generate changed records when
5730                          * loading, we MUST clean up 'header' now.
5731                          */
5732                         newheader->down = NULL;
5733                         free_rdataset(rbtdb, rbtdb->common.mctx, header);
5734                 } else {
5735                         newheader->down = topheader;
5736                         topheader->next = newheader;
5737                         rbtnode->dirty = 1;
5738                         if (changed != NULL)
5739                                 changed->dirty = ISC_TRUE;
5740                         if (rbtversion == NULL) {
5741                                 set_ttl(rbtdb, header, 0);
5742                                 header->attributes |= RDATASET_ATTR_STALE;
5743                                 if (sigheader != NULL) {
5744                                         set_ttl(rbtdb, sigheader, 0);
5745                                         sigheader->attributes |=
5746                                                  RDATASET_ATTR_STALE;
5747                                 }
5748                         }
5749                         idx = newheader->node->locknum;
5750                         if (IS_CACHE(rbtdb)) {
5751                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5752                                                  newheader, link);
5753                                 /*
5754                                  * XXXMLG We don't check the return value
5755                                  * here.  If it fails, we will not do TTL
5756                                  * based expiry on this node.  However, we
5757                                  * will do it on the LRU side, so memory
5758                                  * will not leak... for long.
5759                                  */
5760                                 isc_heap_insert(rbtdb->heaps[idx], newheader);
5761                         } else if (RESIGN(newheader))
5762                                 resign_insert(rbtdb, idx, newheader);
5763                 }
5764         } else {
5765                 /*
5766                  * No non-IGNORED rdatasets of the given type exist at
5767                  * this node.
5768                  */
5769
5770                 /*
5771                  * If we're trying to delete the type, don't bother.
5772                  */
5773                 if (newheader_nx) {
5774                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5775                         return (DNS_R_UNCHANGED);
5776                 }
5777
5778                 if (topheader != NULL) {
5779                         /*
5780                          * We have an list of rdatasets of the given type,
5781                          * but they're all marked IGNORE.  We simply insert
5782                          * the new rdataset at the head of the list.
5783                          *
5784                          * Ignored rdatasets cannot occur during loading, so
5785                          * we INSIST on it.
5786                          */
5787                         INSIST(!loading);
5788                         INSIST(rbtversion == NULL ||
5789                                rbtversion->serial >= topheader->serial);
5790                         if (topheader_prev != NULL)
5791                                 topheader_prev->next = newheader;
5792                         else
5793                                 rbtnode->data = newheader;
5794                         newheader->next = topheader->next;
5795                         newheader->down = topheader;
5796                         topheader->next = newheader;
5797                         rbtnode->dirty = 1;
5798                         if (changed != NULL)
5799                                 changed->dirty = ISC_TRUE;
5800                 } else {
5801                         /*
5802                          * No rdatasets of the given type exist at the node.
5803                          */
5804                         newheader->next = rbtnode->data;
5805                         newheader->down = NULL;
5806                         rbtnode->data = newheader;
5807                 }
5808                 idx = newheader->node->locknum;
5809                 if (IS_CACHE(rbtdb)) {
5810                         ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5811                                          newheader, link);
5812                         isc_heap_insert(rbtdb->heaps[idx], newheader);
5813                 } else if (RESIGN(newheader)) {
5814                         resign_insert(rbtdb, idx, newheader);
5815                 }
5816         }
5817
5818         /*
5819          * Check if the node now contains CNAME and other data.
5820          */
5821         if (rbtversion != NULL &&
5822             cname_and_other_data(rbtnode, rbtversion->serial))
5823                 return (DNS_R_CNAMEANDOTHER);
5824
5825         if (addedrdataset != NULL)
5826                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5827
5828         return (ISC_R_SUCCESS);
5829 }
5830
5831 static inline isc_boolean_t
5832 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5833                 rbtdb_rdatatype_t type)
5834 {
5835         if (IS_CACHE(rbtdb)) {
5836                 if (type == dns_rdatatype_dname)
5837                         return (ISC_TRUE);
5838                 else
5839                         return (ISC_FALSE);
5840         } else if (type == dns_rdatatype_dname ||
5841                    (type == dns_rdatatype_ns &&
5842                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5843                 return (ISC_TRUE);
5844         return (ISC_FALSE);
5845 }
5846
5847 static inline isc_result_t
5848 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5849            dns_rdataset_t *rdataset)
5850 {
5851         struct noqname *noqname;
5852         isc_mem_t *mctx = rbtdb->common.mctx;
5853         dns_name_t name;
5854         dns_rdataset_t neg, negsig;
5855         isc_result_t result;
5856         isc_region_t r;
5857
5858         dns_name_init(&name, NULL);
5859         dns_rdataset_init(&neg);
5860         dns_rdataset_init(&negsig);
5861
5862         result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
5863         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5864
5865         noqname = isc_mem_get(mctx, sizeof(*noqname));
5866         if (noqname == NULL) {
5867                 result = ISC_R_NOMEMORY;
5868                 goto cleanup;
5869         }
5870         dns_name_init(&noqname->name, NULL);
5871         noqname->neg = NULL;
5872         noqname->negsig = NULL;
5873         noqname->type = neg.type;
5874         result = dns_name_dup(&name, mctx, &noqname->name);
5875         if (result != ISC_R_SUCCESS)
5876                 goto cleanup;
5877         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5878         if (result != ISC_R_SUCCESS)
5879                 goto cleanup;
5880         noqname->neg = r.base;
5881         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5882         if (result != ISC_R_SUCCESS)
5883                 goto cleanup;
5884         noqname->negsig = r.base;
5885         dns_rdataset_disassociate(&neg);
5886         dns_rdataset_disassociate(&negsig);
5887         newheader->noqname = noqname;
5888         return (ISC_R_SUCCESS);
5889
5890 cleanup:
5891         dns_rdataset_disassociate(&neg);
5892         dns_rdataset_disassociate(&negsig);
5893         free_noqname(mctx, &noqname);
5894         return(result);
5895 }
5896
5897 static inline isc_result_t
5898 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5899            dns_rdataset_t *rdataset)
5900 {
5901         struct noqname *closest;
5902         isc_mem_t *mctx = rbtdb->common.mctx;
5903         dns_name_t name;
5904         dns_rdataset_t neg, negsig;
5905         isc_result_t result;
5906         isc_region_t r;
5907
5908         dns_name_init(&name, NULL);
5909         dns_rdataset_init(&neg);
5910         dns_rdataset_init(&negsig);
5911
5912         result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
5913         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5914
5915         closest = isc_mem_get(mctx, sizeof(*closest));
5916         if (closest == NULL) {
5917                 result = ISC_R_NOMEMORY;
5918                 goto cleanup;
5919         }
5920         dns_name_init(&closest->name, NULL);
5921         closest->neg = NULL;
5922         closest->negsig = NULL;
5923         closest->type = neg.type;
5924         result = dns_name_dup(&name, mctx, &closest->name);
5925         if (result != ISC_R_SUCCESS)
5926                 goto cleanup;
5927         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5928         if (result != ISC_R_SUCCESS)
5929                 goto cleanup;
5930         closest->neg = r.base;
5931         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5932         if (result != ISC_R_SUCCESS)
5933                 goto cleanup;
5934         closest->negsig = r.base;
5935         dns_rdataset_disassociate(&neg);
5936         dns_rdataset_disassociate(&negsig);
5937         newheader->closest = closest;
5938         return (ISC_R_SUCCESS);
5939
5940  cleanup:
5941         dns_rdataset_disassociate(&neg);
5942         dns_rdataset_disassociate(&negsig);
5943         free_noqname(mctx, &closest);
5944         return(result);
5945 }
5946
5947 static dns_dbmethods_t zone_methods;
5948
5949 static isc_result_t
5950 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5951             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
5952             dns_rdataset_t *addedrdataset)
5953 {
5954         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5955         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5956         rbtdb_version_t *rbtversion = version;
5957         isc_region_t region;
5958         rdatasetheader_t *newheader;
5959         rdatasetheader_t *header;
5960         isc_result_t result;
5961         isc_boolean_t delegating;
5962         isc_boolean_t tree_locked = ISC_FALSE;
5963         isc_boolean_t cache_is_overmem = ISC_FALSE;
5964
5965         REQUIRE(VALID_RBTDB(rbtdb));
5966
5967         if (rbtdb->common.methods == &zone_methods)
5968                 REQUIRE(((rbtnode->nsec3 &&
5969                           (rdataset->type == dns_rdatatype_nsec3 ||
5970                            rdataset->covers == dns_rdatatype_nsec3)) ||
5971                          (!rbtnode->nsec3 &&
5972                            rdataset->type != dns_rdatatype_nsec3 &&
5973                            rdataset->covers != dns_rdatatype_nsec3)));
5974
5975         if (rbtversion == NULL) {
5976                 if (now == 0)
5977                         isc_stdtime_get(&now);
5978         } else
5979                 now = 0;
5980
5981         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5982                                             &region,
5983                                             sizeof(rdatasetheader_t));
5984         if (result != ISC_R_SUCCESS)
5985                 return (result);
5986
5987         newheader = (rdatasetheader_t *)region.base;
5988         init_rdataset(rbtdb, newheader);
5989         set_ttl(rbtdb, newheader, rdataset->ttl + now);
5990         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5991                                                 rdataset->covers);
5992         newheader->attributes = 0;
5993         newheader->noqname = NULL;
5994         newheader->closest = NULL;
5995         newheader->count = init_count++;
5996         newheader->trust = rdataset->trust;
5997         newheader->additional_auth = NULL;
5998         newheader->additional_glue = NULL;
5999         newheader->last_used = now;
6000         newheader->node = rbtnode;
6001         if (rbtversion != NULL) {
6002                 newheader->serial = rbtversion->serial;
6003                 now = 0;
6004
6005                 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6006                         newheader->attributes |= RDATASET_ATTR_RESIGN;
6007                         newheader->resign = rdataset->resign;
6008                 } else
6009                         newheader->resign = 0;
6010         } else {
6011                 newheader->serial = 1;
6012                 newheader->resign = 0;
6013                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6014                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6015                 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6016                         newheader->attributes |= RDATASET_ATTR_OPTOUT;
6017                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6018                         result = addnoqname(rbtdb, newheader, rdataset);
6019                         if (result != ISC_R_SUCCESS) {
6020                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6021                                               newheader);
6022                                 return (result);
6023                         }
6024                 }
6025                 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6026                         result = addclosest(rbtdb, newheader, rdataset);
6027                         if (result != ISC_R_SUCCESS) {
6028                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6029                                               newheader);
6030                                 return (result);
6031                         }
6032                 }
6033         }
6034
6035         /*
6036          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6037          * just DNAME for the cache), then we need to set the callback bit
6038          * on the node.
6039          */
6040         if (delegating_type(rbtdb, rbtnode, rdataset->type))
6041                 delegating = ISC_TRUE;
6042         else
6043                 delegating = ISC_FALSE;
6044
6045         /*
6046          * If we're adding a delegation type or the DB is a cache in an overmem
6047          * state, hold an exclusive lock on the tree.  In the latter case
6048          * the lock does not necessarily have to be acquired but it will help
6049          * purge stale entries more effectively.
6050          */
6051         if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
6052                 cache_is_overmem = ISC_TRUE;
6053         if (delegating || cache_is_overmem) {
6054                 tree_locked = ISC_TRUE;
6055                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6056         }
6057
6058         if (cache_is_overmem)
6059                 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6060
6061         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6062                   isc_rwlocktype_write);
6063
6064         if (rbtdb->rrsetstats != NULL) {
6065                 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6066                 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6067         }
6068
6069         if (IS_CACHE(rbtdb)) {
6070                 if (tree_locked)
6071                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6072
6073                 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6074                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6075                         expire_header(rbtdb, header, tree_locked);
6076
6077                 /*
6078                  * If we've been holding a write lock on the tree just for
6079                  * cleaning, we can release it now.  However, we still need the
6080                  * node lock.
6081                  */
6082                 if (tree_locked && !delegating) {
6083                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6084                         tree_locked = ISC_FALSE;
6085                 }
6086         }
6087
6088         result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
6089                      addedrdataset, now);
6090         if (result == ISC_R_SUCCESS && delegating)
6091                 rbtnode->find_callback = 1;
6092
6093         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6094                     isc_rwlocktype_write);
6095
6096         if (tree_locked)
6097                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6098
6099         /*
6100          * Update the zone's secure status.  If version is non-NULL
6101          * this is deferred until closeversion() is called.
6102          */
6103         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6104                 iszonesecure(db, version, rbtdb->origin_node);
6105
6106         return (result);
6107 }
6108
6109 static isc_result_t
6110 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6111                  dns_rdataset_t *rdataset, unsigned int options,
6112                  dns_rdataset_t *newrdataset)
6113 {
6114         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6115         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6116         rbtdb_version_t *rbtversion = version;
6117         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6118         unsigned char *subresult;
6119         isc_region_t region;
6120         isc_result_t result;
6121         rbtdb_changed_t *changed;
6122
6123         REQUIRE(VALID_RBTDB(rbtdb));
6124
6125         if (rbtdb->common.methods == &zone_methods)
6126                 REQUIRE(((rbtnode->nsec3 &&
6127                           (rdataset->type == dns_rdatatype_nsec3 ||
6128                            rdataset->covers == dns_rdatatype_nsec3)) ||
6129                          (!rbtnode->nsec3 &&
6130                            rdataset->type != dns_rdatatype_nsec3 &&
6131                            rdataset->covers != dns_rdatatype_nsec3)));
6132
6133         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6134                                             &region,
6135                                             sizeof(rdatasetheader_t));
6136         if (result != ISC_R_SUCCESS)
6137                 return (result);
6138         newheader = (rdatasetheader_t *)region.base;
6139         init_rdataset(rbtdb, newheader);
6140         set_ttl(rbtdb, newheader, rdataset->ttl);
6141         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6142                                                 rdataset->covers);
6143         newheader->attributes = 0;
6144         newheader->serial = rbtversion->serial;
6145         newheader->trust = 0;
6146         newheader->noqname = NULL;
6147         newheader->closest = NULL;
6148         newheader->count = init_count++;
6149         newheader->additional_auth = NULL;
6150         newheader->additional_glue = NULL;
6151         newheader->last_used = 0;
6152         newheader->node = rbtnode;
6153         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6154                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6155                 newheader->resign = rdataset->resign;
6156         } else
6157                 newheader->resign = 0;
6158
6159         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6160                   isc_rwlocktype_write);
6161
6162         changed = add_changed(rbtdb, rbtversion, rbtnode);
6163         if (changed == NULL) {
6164                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6165                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6166                             isc_rwlocktype_write);
6167                 return (ISC_R_NOMEMORY);
6168         }
6169
6170         topheader_prev = NULL;
6171         for (topheader = rbtnode->data;
6172              topheader != NULL;
6173              topheader = topheader->next) {
6174                 if (topheader->type == newheader->type)
6175                         break;
6176                 topheader_prev = topheader;
6177         }
6178         /*
6179          * If header isn't NULL, we've found the right type.  There may be
6180          * IGNORE rdatasets between the top of the chain and the first real
6181          * data.  We skip over them.
6182          */
6183         header = topheader;
6184         while (header != NULL && IGNORE(header))
6185                 header = header->down;
6186         if (header != NULL && EXISTS(header)) {
6187                 unsigned int flags = 0;
6188                 subresult = NULL;
6189                 result = ISC_R_SUCCESS;
6190                 if ((options & DNS_DBSUB_EXACT) != 0) {
6191                         flags |= DNS_RDATASLAB_EXACT;
6192                         if (newheader->rdh_ttl != header->rdh_ttl)
6193                                 result = DNS_R_NOTEXACT;
6194                 }
6195                 if (result == ISC_R_SUCCESS)
6196                         result = dns_rdataslab_subtract(
6197                                         (unsigned char *)header,
6198                                         (unsigned char *)newheader,
6199                                         (unsigned int)(sizeof(*newheader)),
6200                                         rbtdb->common.mctx,
6201                                         rbtdb->common.rdclass,
6202                                         (dns_rdatatype_t)header->type,
6203                                         flags, &subresult);
6204                 if (result == ISC_R_SUCCESS) {
6205                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6206                         newheader = (rdatasetheader_t *)subresult;
6207                         init_rdataset(rbtdb, newheader);
6208                         /*
6209                          * We have to set the serial since the rdataslab
6210                          * subtraction routine copies the reserved portion of
6211                          * header, not newheader.
6212                          */
6213                         newheader->serial = rbtversion->serial;
6214                         /*
6215                          * XXXJT: dns_rdataslab_subtract() copied the pointers
6216                          * to additional info.  We need to clear these fields
6217                          * to avoid having duplicated references.
6218                          */
6219                         newheader->additional_auth = NULL;
6220                         newheader->additional_glue = NULL;
6221                 } else if (result == DNS_R_NXRRSET) {
6222                         /*
6223                          * This subtraction would remove all of the rdata;
6224                          * add a nonexistent header instead.
6225                          */
6226                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6227                         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6228                         if (newheader == NULL) {
6229                                 result = ISC_R_NOMEMORY;
6230                                 goto unlock;
6231                         }
6232                         set_ttl(rbtdb, newheader, 0);
6233                         newheader->type = topheader->type;
6234                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6235                         newheader->trust = 0;
6236                         newheader->serial = rbtversion->serial;
6237                         newheader->noqname = NULL;
6238                         newheader->closest = NULL;
6239                         newheader->count = 0;
6240                         newheader->additional_auth = NULL;
6241                         newheader->additional_glue = NULL;
6242                         newheader->node = rbtnode;
6243                         newheader->resign = 0;
6244                         newheader->last_used = 0;
6245                 } else {
6246                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6247                         goto unlock;
6248                 }
6249
6250                 /*
6251                  * If we're here, we want to link newheader in front of
6252                  * topheader.
6253                  */
6254                 INSIST(rbtversion->serial >= topheader->serial);
6255                 if (topheader_prev != NULL)
6256                         topheader_prev->next = newheader;
6257                 else
6258                         rbtnode->data = newheader;
6259                 newheader->next = topheader->next;
6260                 newheader->down = topheader;
6261                 topheader->next = newheader;
6262                 rbtnode->dirty = 1;
6263                 changed->dirty = ISC_TRUE;
6264         } else {
6265                 /*
6266                  * The rdataset doesn't exist, so we don't need to do anything
6267                  * to satisfy the deletion request.
6268                  */
6269                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6270                 if ((options & DNS_DBSUB_EXACT) != 0)
6271                         result = DNS_R_NOTEXACT;
6272                 else
6273                         result = DNS_R_UNCHANGED;
6274         }
6275
6276         if (result == ISC_R_SUCCESS && newrdataset != NULL)
6277                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6278
6279  unlock:
6280         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6281                     isc_rwlocktype_write);
6282
6283         /*
6284          * Update the zone's secure status.  If version is non-NULL
6285          * this is deferred until closeversion() is called.
6286          */
6287         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6288                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6289
6290         return (result);
6291 }
6292
6293 static isc_result_t
6294 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6295                dns_rdatatype_t type, dns_rdatatype_t covers)
6296 {
6297         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6298         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6299         rbtdb_version_t *rbtversion = version;
6300         isc_result_t result;
6301         rdatasetheader_t *newheader;
6302
6303         REQUIRE(VALID_RBTDB(rbtdb));
6304
6305         if (type == dns_rdatatype_any)
6306                 return (ISC_R_NOTIMPLEMENTED);
6307         if (type == dns_rdatatype_rrsig && covers == 0)
6308                 return (ISC_R_NOTIMPLEMENTED);
6309
6310         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6311         if (newheader == NULL)
6312                 return (ISC_R_NOMEMORY);
6313         set_ttl(rbtdb, newheader, 0);
6314         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6315         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6316         newheader->trust = 0;
6317         newheader->noqname = NULL;
6318         newheader->closest = NULL;
6319         newheader->additional_auth = NULL;
6320         newheader->additional_glue = NULL;
6321         if (rbtversion != NULL)
6322                 newheader->serial = rbtversion->serial;
6323         else
6324                 newheader->serial = 0;
6325         newheader->count = 0;
6326         newheader->last_used = 0;
6327         newheader->node = rbtnode;
6328
6329         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6330                   isc_rwlocktype_write);
6331
6332         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6333                      ISC_FALSE, NULL, 0);
6334
6335         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6336                     isc_rwlocktype_write);
6337
6338         /*
6339          * Update the zone's secure status.  If version is non-NULL
6340          * this is deferred until closeversion() is called.
6341          */
6342         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6343                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6344
6345         return (result);
6346 }
6347
6348 static isc_result_t
6349 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6350         rbtdb_load_t *loadctx = arg;
6351         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6352         dns_rbtnode_t *node;
6353         isc_result_t result;
6354         isc_region_t region;
6355         rdatasetheader_t *newheader;
6356
6357         /*
6358          * This routine does no node locking.  See comments in
6359          * 'load' below for more information on loading and
6360          * locking.
6361          */
6362
6363
6364         /*
6365          * SOA records are only allowed at top of zone.
6366          */
6367         if (rdataset->type == dns_rdatatype_soa &&
6368             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6369                 return (DNS_R_NOTZONETOP);
6370
6371         if (rdataset->type != dns_rdatatype_nsec3 &&
6372             rdataset->covers != dns_rdatatype_nsec3)
6373                 add_empty_wildcards(rbtdb, name);
6374
6375         if (dns_name_iswildcard(name)) {
6376                 /*
6377                  * NS record owners cannot legally be wild cards.
6378                  */
6379                 if (rdataset->type == dns_rdatatype_ns)
6380                         return (DNS_R_INVALIDNS);
6381                 /*
6382                  * NSEC3 record owners cannot legally be wild cards.
6383                  */
6384                 if (rdataset->type == dns_rdatatype_nsec3)
6385                         return (DNS_R_INVALIDNSEC3);
6386                 result = add_wildcard_magic(rbtdb, name);
6387                 if (result != ISC_R_SUCCESS)
6388                         return (result);
6389         }
6390
6391         node = NULL;
6392         if (rdataset->type == dns_rdatatype_nsec3 ||
6393             rdataset->covers == dns_rdatatype_nsec3) {
6394                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6395                 if (result == ISC_R_SUCCESS)
6396                         node->nsec3 = 1;
6397         } else {
6398                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
6399                 if (result == ISC_R_SUCCESS)
6400                         node->nsec3 = 0;
6401         }
6402         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6403                 return (result);
6404         if (result != ISC_R_EXISTS) {
6405                 dns_name_t foundname;
6406                 dns_name_init(&foundname, NULL);
6407                 dns_rbt_namefromnode(node, &foundname);
6408 #ifdef DNS_RBT_USEHASH
6409                 node->locknum = node->hashval % rbtdb->node_lock_count;
6410 #else
6411                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6412                         rbtdb->node_lock_count;
6413 #endif
6414         }
6415
6416         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6417                                             &region,
6418                                             sizeof(rdatasetheader_t));
6419         if (result != ISC_R_SUCCESS)
6420                 return (result);
6421         newheader = (rdatasetheader_t *)region.base;
6422         init_rdataset(rbtdb, newheader);
6423         set_ttl(rbtdb, newheader,
6424                 rdataset->ttl + loadctx->now); /* XXX overflow check */
6425         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6426                                                 rdataset->covers);
6427         newheader->attributes = 0;
6428         newheader->trust = rdataset->trust;
6429         newheader->serial = 1;
6430         newheader->noqname = NULL;
6431         newheader->closest = NULL;
6432         newheader->count = init_count++;
6433         newheader->additional_auth = NULL;
6434         newheader->additional_glue = NULL;
6435         newheader->last_used = 0;
6436         newheader->node = node;
6437         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6438                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6439                 newheader->resign = rdataset->resign;
6440         } else
6441                 newheader->resign = 0;
6442
6443         result = add(rbtdb, node, rbtdb->current_version, newheader,
6444                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6445         if (result == ISC_R_SUCCESS &&
6446             delegating_type(rbtdb, node, rdataset->type))
6447                 node->find_callback = 1;
6448         else if (result == DNS_R_UNCHANGED)
6449                 result = ISC_R_SUCCESS;
6450
6451         return (result);
6452 }
6453
6454 static isc_result_t
6455 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6456         rbtdb_load_t *loadctx;
6457         dns_rbtdb_t *rbtdb;
6458
6459         rbtdb = (dns_rbtdb_t *)db;
6460
6461         REQUIRE(VALID_RBTDB(rbtdb));
6462
6463         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6464         if (loadctx == NULL)
6465                 return (ISC_R_NOMEMORY);
6466
6467         loadctx->rbtdb = rbtdb;
6468         if (IS_CACHE(rbtdb))
6469                 isc_stdtime_get(&loadctx->now);
6470         else
6471                 loadctx->now = 0;
6472
6473         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6474
6475         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6476                 == 0);
6477         rbtdb->attributes |= RBTDB_ATTR_LOADING;
6478
6479         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6480
6481         *addp = loading_addrdataset;
6482         *dbloadp = loadctx;
6483
6484         return (ISC_R_SUCCESS);
6485 }
6486
6487 static isc_result_t
6488 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6489         rbtdb_load_t *loadctx;
6490         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6491
6492         REQUIRE(VALID_RBTDB(rbtdb));
6493         REQUIRE(dbloadp != NULL);
6494         loadctx = *dbloadp;
6495         REQUIRE(loadctx->rbtdb == rbtdb);
6496
6497         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6498
6499         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6500         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6501
6502         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6503         rbtdb->attributes |= RBTDB_ATTR_LOADED;
6504
6505         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6506
6507         /*
6508          * If there's a KEY rdataset at the zone origin containing a
6509          * zone key, we consider the zone secure.
6510          */
6511         if (! IS_CACHE(rbtdb))
6512                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6513
6514         *dbloadp = NULL;
6515
6516         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6517
6518         return (ISC_R_SUCCESS);
6519 }
6520
6521 static isc_result_t
6522 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6523      dns_masterformat_t masterformat) {
6524         dns_rbtdb_t *rbtdb;
6525
6526         rbtdb = (dns_rbtdb_t *)db;
6527
6528         REQUIRE(VALID_RBTDB(rbtdb));
6529
6530         return (dns_master_dump2(rbtdb->common.mctx, db, version,
6531                                  &dns_master_style_default,
6532                                  filename, masterformat));
6533 }
6534
6535 static void
6536 delete_callback(void *data, void *arg) {
6537         dns_rbtdb_t *rbtdb = arg;
6538         rdatasetheader_t *current, *next;
6539         unsigned int locknum;
6540
6541         current = data;
6542         locknum = current->node->locknum;
6543         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6544         while (current != NULL) {
6545                 next = current->next;
6546                 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6547                 current = next;
6548         }
6549         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6550 }
6551
6552 static isc_boolean_t
6553 issecure(dns_db_t *db) {
6554         dns_rbtdb_t *rbtdb;
6555         isc_boolean_t secure;
6556
6557         rbtdb = (dns_rbtdb_t *)db;
6558
6559         REQUIRE(VALID_RBTDB(rbtdb));
6560
6561         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6562         secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6563         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6564
6565         return (secure);
6566 }
6567
6568 static isc_boolean_t
6569 isdnssec(dns_db_t *db) {
6570         dns_rbtdb_t *rbtdb;
6571         isc_boolean_t dnssec;
6572
6573         rbtdb = (dns_rbtdb_t *)db;
6574
6575         REQUIRE(VALID_RBTDB(rbtdb));
6576
6577         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6578         dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6579         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6580
6581         return (dnssec);
6582 }
6583
6584 static unsigned int
6585 nodecount(dns_db_t *db) {
6586         dns_rbtdb_t *rbtdb;
6587         unsigned int count;
6588
6589         rbtdb = (dns_rbtdb_t *)db;
6590
6591         REQUIRE(VALID_RBTDB(rbtdb));
6592
6593         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6594         count = dns_rbt_nodecount(rbtdb->tree);
6595         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6596
6597         return (count);
6598 }
6599
6600 static void
6601 settask(dns_db_t *db, isc_task_t *task) {
6602         dns_rbtdb_t *rbtdb;
6603
6604         rbtdb = (dns_rbtdb_t *)db;
6605
6606         REQUIRE(VALID_RBTDB(rbtdb));
6607
6608         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6609         if (rbtdb->task != NULL)
6610                 isc_task_detach(&rbtdb->task);
6611         if (task != NULL)
6612                 isc_task_attach(task, &rbtdb->task);
6613         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6614 }
6615
6616 static isc_boolean_t
6617 ispersistent(dns_db_t *db) {
6618         UNUSED(db);
6619         return (ISC_FALSE);
6620 }
6621
6622 static isc_result_t
6623 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6624         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6625         dns_rbtnode_t *onode;
6626         isc_result_t result = ISC_R_SUCCESS;
6627
6628         REQUIRE(VALID_RBTDB(rbtdb));
6629         REQUIRE(nodep != NULL && *nodep == NULL);
6630
6631         /* Note that the access to origin_node doesn't require a DB lock */
6632         onode = (dns_rbtnode_t *)rbtdb->origin_node;
6633         if (onode != NULL) {
6634                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6635                 new_reference(rbtdb, onode);
6636                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6637
6638                 *nodep = rbtdb->origin_node;
6639         } else {
6640                 INSIST(IS_CACHE(rbtdb));
6641                 result = ISC_R_NOTFOUND;
6642         }
6643
6644         return (result);
6645 }
6646
6647 static isc_result_t
6648 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6649                    isc_uint8_t *flags, isc_uint16_t *iterations,
6650                    unsigned char *salt, size_t *salt_length)
6651 {
6652         dns_rbtdb_t *rbtdb;
6653         isc_result_t result = ISC_R_NOTFOUND;
6654         rbtdb_version_t *rbtversion = version;
6655
6656         rbtdb = (dns_rbtdb_t *)db;
6657
6658         REQUIRE(VALID_RBTDB(rbtdb));
6659
6660         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6661
6662         if (rbtversion == NULL)
6663                 rbtversion = rbtdb->current_version;
6664
6665         if (rbtversion->havensec3) {
6666                 if (hash != NULL)
6667                         *hash = rbtversion->hash;
6668                 if (salt != NULL && salt_length != NULL) {
6669                         REQUIRE(*salt_length >= rbtversion->salt_length);
6670                         memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6671                 }
6672                 if (salt_length != NULL)
6673                         *salt_length = rbtversion->salt_length;
6674                 if (iterations != NULL)
6675                         *iterations = rbtversion->iterations;
6676                 if (flags != NULL)
6677                         *flags = rbtversion->flags;
6678                 result = ISC_R_SUCCESS;
6679         }
6680         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6681
6682         return (result);
6683 }
6684
6685 static isc_result_t
6686 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6687         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6688         isc_stdtime_t oldresign;
6689         isc_result_t result = ISC_R_SUCCESS;
6690         rdatasetheader_t *header;
6691
6692         REQUIRE(VALID_RBTDB(rbtdb));
6693         REQUIRE(!IS_CACHE(rbtdb));
6694         REQUIRE(rdataset != NULL);
6695
6696         header = rdataset->private3;
6697         header--;
6698
6699         NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6700                   isc_rwlocktype_write);
6701
6702         oldresign = header->resign;
6703         header->resign = resign;
6704         if (header->heap_index != 0) {
6705                 INSIST(RESIGN(header));
6706                 if (resign == 0) {
6707                         isc_heap_delete(rbtdb->heaps[header->node->locknum],
6708                                         header->heap_index);
6709                         header->heap_index = 0;
6710                 } else if (resign < oldresign)
6711                         isc_heap_increased(rbtdb->heaps[header->node->locknum],
6712                                            header->heap_index);
6713                 else
6714                         isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6715                                            header->heap_index);
6716         } else if (resign && header->heap_index == 0) {
6717                 header->attributes |= RDATASET_ATTR_RESIGN;
6718                 result = resign_insert(rbtdb, header->node->locknum, header);
6719         }
6720         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6721                     isc_rwlocktype_write);
6722         return (result);
6723 }
6724
6725 static isc_result_t
6726 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
6727                dns_name_t *foundname)
6728 {
6729         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6730         rdatasetheader_t *header = NULL, *this;
6731         unsigned int i;
6732         isc_result_t result = ISC_R_NOTFOUND;
6733         unsigned int locknum;
6734
6735         REQUIRE(VALID_RBTDB(rbtdb));
6736
6737         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
6738
6739         for (i = 0; i < rbtdb->node_lock_count; i++) {
6740                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
6741                 this = isc_heap_element(rbtdb->heaps[i], 1);
6742                 if (this == NULL) {
6743                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6744                                     isc_rwlocktype_read);
6745                         continue;
6746                 }
6747                 if (header == NULL)
6748                         header = this;
6749                 else if (isc_serial_lt(this->resign, header->resign)) {
6750                         locknum = header->node->locknum;
6751                         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
6752                                     isc_rwlocktype_read);
6753                         header = this;
6754                 } else
6755                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6756                                     isc_rwlocktype_read);
6757         }
6758
6759         if (header == NULL)
6760                 goto unlock;
6761
6762         bind_rdataset(rbtdb, header->node, header, 0, rdataset);
6763
6764         if (foundname != NULL)
6765                 dns_rbt_fullnamefromnode(header->node, foundname);
6766
6767         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6768                     isc_rwlocktype_read);
6769
6770         result = ISC_R_SUCCESS;
6771
6772  unlock:
6773         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
6774
6775         return (result);
6776 }
6777
6778 static void
6779 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
6780 {
6781         rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
6782         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6783         dns_rbtnode_t *node;
6784         rdatasetheader_t *header;
6785
6786         REQUIRE(VALID_RBTDB(rbtdb));
6787         REQUIRE(rdataset != NULL);
6788         REQUIRE(rbtdb->future_version == rbtversion);
6789         REQUIRE(rbtversion->writer);
6790
6791         node = rdataset->private2;
6792         header = rdataset->private3;
6793         header--;
6794
6795         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6796         NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
6797                   isc_rwlocktype_write);
6798         /*
6799          * Delete from heap and save to re-signed list so that it can
6800          * be restored if we backout of this change.
6801          */
6802         new_reference(rbtdb, node);
6803         isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
6804         header->heap_index = 0;
6805         ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
6806
6807         NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
6808                     isc_rwlocktype_write);
6809         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6810 }
6811
6812 static dns_stats_t *
6813 getrrsetstats(dns_db_t *db) {
6814         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6815
6816         REQUIRE(VALID_RBTDB(rbtdb));
6817         REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6818
6819         return (rbtdb->rrsetstats);
6820 }
6821
6822 static dns_dbmethods_t zone_methods = {
6823         attach,
6824         detach,
6825         beginload,
6826         endload,
6827         dump,
6828         currentversion,
6829         newversion,
6830         attachversion,
6831         closeversion,
6832         findnode,
6833         zone_find,
6834         zone_findzonecut,
6835         attachnode,
6836         detachnode,
6837         expirenode,
6838         printnode,
6839         createiterator,
6840         zone_findrdataset,
6841         allrdatasets,
6842         addrdataset,
6843         subtractrdataset,
6844         deleterdataset,
6845         issecure,
6846         nodecount,
6847         ispersistent,
6848         overmem,
6849         settask,
6850         getoriginnode,
6851         NULL,
6852         getnsec3parameters,
6853         findnsec3node,
6854         setsigningtime,
6855         getsigningtime,
6856         resigned,
6857         isdnssec,
6858         NULL
6859 };
6860
6861 static dns_dbmethods_t cache_methods = {
6862         attach,
6863         detach,
6864         beginload,
6865         endload,
6866         dump,
6867         currentversion,
6868         newversion,
6869         attachversion,
6870         closeversion,
6871         findnode,
6872         cache_find,
6873         cache_findzonecut,
6874         attachnode,
6875         detachnode,
6876         expirenode,
6877         printnode,
6878         createiterator,
6879         cache_findrdataset,
6880         allrdatasets,
6881         addrdataset,
6882         subtractrdataset,
6883         deleterdataset,
6884         issecure,
6885         nodecount,
6886         ispersistent,
6887         overmem,
6888         settask,
6889         getoriginnode,
6890         NULL,
6891         NULL,
6892         NULL,
6893         NULL,
6894         NULL,
6895         NULL,
6896         isdnssec,
6897         getrrsetstats
6898 };
6899
6900 isc_result_t
6901 #ifdef DNS_RBTDB_VERSION64
6902 dns_rbtdb64_create
6903 #else
6904 dns_rbtdb_create
6905 #endif
6906                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6907                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6908                  void *driverarg, dns_db_t **dbp)
6909 {
6910         dns_rbtdb_t *rbtdb;
6911         isc_result_t result;
6912         int i;
6913         dns_name_t name;
6914         isc_boolean_t (*sooner)(void *, void *);
6915
6916         /* Keep the compiler happy. */
6917         UNUSED(argc);
6918         UNUSED(argv);
6919         UNUSED(driverarg);
6920
6921         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
6922         if (rbtdb == NULL)
6923                 return (ISC_R_NOMEMORY);
6924
6925         memset(rbtdb, '\0', sizeof(*rbtdb));
6926         dns_name_init(&rbtdb->common.origin, NULL);
6927         rbtdb->common.attributes = 0;
6928         if (type == dns_dbtype_cache) {
6929                 rbtdb->common.methods = &cache_methods;
6930                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
6931         } else if (type == dns_dbtype_stub) {
6932                 rbtdb->common.methods = &zone_methods;
6933                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
6934         } else
6935                 rbtdb->common.methods = &zone_methods;
6936         rbtdb->common.rdclass = rdclass;
6937         rbtdb->common.mctx = NULL;
6938
6939         result = RBTDB_INITLOCK(&rbtdb->lock);
6940         if (result != ISC_R_SUCCESS)
6941                 goto cleanup_rbtdb;
6942
6943         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
6944         if (result != ISC_R_SUCCESS)
6945                 goto cleanup_lock;
6946
6947         /*
6948          * Initialize node_lock_count in a generic way to support future
6949          * extension which allows the user to specify this value on creation.
6950          * Note that when specified for a cache DB it must be larger than 1
6951          * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
6952          */
6953         if (rbtdb->node_lock_count == 0) {
6954                 if (IS_CACHE(rbtdb))
6955                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
6956                 else
6957                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
6958         } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
6959                 result = ISC_R_RANGE;
6960                 goto cleanup_tree_lock;
6961         }
6962         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
6963         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
6964                                         sizeof(rbtdb_nodelock_t));
6965         if (rbtdb->node_locks == NULL) {
6966                 result = ISC_R_NOMEMORY;
6967                 goto cleanup_tree_lock;
6968         }
6969
6970         rbtdb->rrsetstats = NULL;
6971         if (IS_CACHE(rbtdb)) {
6972                 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
6973                 if (result != ISC_R_SUCCESS)
6974                         goto cleanup_node_locks;
6975                 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
6976                                                sizeof(rdatasetheaderlist_t));
6977                 if (rbtdb->rdatasets == NULL) {
6978                         result = ISC_R_NOMEMORY;
6979                         goto cleanup_rrsetstats;
6980                 }
6981                 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6982                         ISC_LIST_INIT(rbtdb->rdatasets[i]);
6983         } else
6984                 rbtdb->rdatasets = NULL;
6985
6986         /*
6987          * Create the heaps.
6988          */
6989         rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
6990                                    sizeof(isc_heap_t *));
6991         if (rbtdb->heaps == NULL) {
6992                 result = ISC_R_NOMEMORY;
6993                 goto cleanup_rdatasets;
6994         }
6995         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6996                 rbtdb->heaps[i] = NULL;
6997         sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
6998         for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
6999                 result = isc_heap_create(mctx, sooner, set_index, 0,
7000                                          &rbtdb->heaps[i]);
7001                 if (result != ISC_R_SUCCESS)
7002                         goto cleanup_heaps;
7003         }
7004
7005         /*
7006          * Create deadnode lists.
7007          */
7008         rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
7009                                        sizeof(rbtnodelist_t));
7010         if (rbtdb->deadnodes == NULL) {
7011                 result = ISC_R_NOMEMORY;
7012                 goto cleanup_heaps;
7013         }
7014         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7015                 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7016
7017         rbtdb->active = rbtdb->node_lock_count;
7018
7019         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7020                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7021                 if (result == ISC_R_SUCCESS) {
7022                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7023                         if (result != ISC_R_SUCCESS)
7024                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7025                 }
7026                 if (result != ISC_R_SUCCESS) {
7027                         while (i-- > 0) {
7028                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7029                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7030                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7031                         }
7032                         goto cleanup_deadnodes;
7033                 }
7034                 rbtdb->node_locks[i].exiting = ISC_FALSE;
7035         }
7036
7037         /*
7038          * Attach to the mctx.  The database will persist so long as there
7039          * are references to it, and attaching to the mctx ensures that our
7040          * mctx won't disappear out from under us.
7041          */
7042         isc_mem_attach(mctx, &rbtdb->common.mctx);
7043
7044         /*
7045          * Must be initialized before free_rbtdb() is called.
7046          */
7047         isc_ondestroy_init(&rbtdb->common.ondest);
7048
7049         /*
7050          * Make a copy of the origin name.
7051          */
7052         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7053         if (result != ISC_R_SUCCESS) {
7054                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7055                 return (result);
7056         }
7057
7058         /*
7059          * Make the Red-Black Trees.
7060          */
7061         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7062         if (result != ISC_R_SUCCESS) {
7063                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7064                 return (result);
7065         }
7066
7067         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7068         if (result != ISC_R_SUCCESS) {
7069                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7070                 return (result);
7071         }
7072
7073         /*
7074          * In order to set the node callback bit correctly in zone databases,
7075          * we need to know if the node has the origin name of the zone.
7076          * In loading_addrdataset() we could simply compare the new name
7077          * to the origin name, but this is expensive.  Also, we don't know the
7078          * node name in addrdataset(), so we need another way of knowing the
7079          * zone's top.
7080          *
7081          * We now explicitly create a node for the zone's origin, and then
7082          * we simply remember the node's address.  This is safe, because
7083          * the top-of-zone node can never be deleted, nor can its address
7084          * change.
7085          */
7086         if (!IS_CACHE(rbtdb)) {
7087                 dns_rbtnode_t *nsec3node;
7088
7089                 rbtdb->origin_node = NULL;
7090                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7091                                          &rbtdb->origin_node);
7092                 if (result != ISC_R_SUCCESS) {
7093                         INSIST(result != ISC_R_EXISTS);
7094                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7095                         return (result);
7096                 }
7097                 rbtdb->origin_node->nsec3 = 0;
7098                 /*
7099                  * We need to give the origin node the right locknum.
7100                  */
7101                 dns_name_init(&name, NULL);
7102                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7103 #ifdef DNS_RBT_USEHASH
7104                 rbtdb->origin_node->locknum =
7105                         rbtdb->origin_node->hashval %
7106                         rbtdb->node_lock_count;
7107 #else
7108                 rbtdb->origin_node->locknum =
7109                         dns_name_hash(&name, ISC_TRUE) %
7110                         rbtdb->node_lock_count;
7111 #endif
7112                 /*
7113                  * Add an apex node to the NSEC3 tree so that NSEC3 searches
7114                  * return partial matches when there is only a single NSEC3
7115                  * record in the tree.
7116                  */
7117                 nsec3node = NULL;
7118                 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
7119                                          &nsec3node);
7120                 if (result != ISC_R_SUCCESS) {
7121                         INSIST(result != ISC_R_EXISTS);
7122                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7123                         return (result);
7124                 }
7125                 nsec3node->nsec3 = 1;
7126                 /*
7127                  * We need to give the nsec3 origin node the right locknum.
7128                  */
7129                 dns_name_init(&name, NULL);
7130                 dns_rbt_namefromnode(nsec3node, &name);
7131 #ifdef DNS_RBT_USEHASH
7132                 nsec3node->locknum = nsec3node->hashval %
7133                         rbtdb->node_lock_count;
7134 #else
7135                 nsec3node->locknum = dns_name_hash(&name, ISC_TRUE) %
7136                         rbtdb->node_lock_count;
7137 #endif
7138         }
7139
7140         /*
7141          * Misc. Initialization.
7142          */
7143         result = isc_refcount_init(&rbtdb->references, 1);
7144         if (result != ISC_R_SUCCESS) {
7145                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7146                 return (result);
7147         }
7148         rbtdb->attributes = 0;
7149         rbtdb->task = NULL;
7150
7151         /*
7152          * Version Initialization.
7153          */
7154         rbtdb->current_serial = 1;
7155         rbtdb->least_serial = 1;
7156         rbtdb->next_serial = 2;
7157         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7158         if (rbtdb->current_version == NULL) {
7159                 isc_refcount_decrement(&rbtdb->references, NULL);
7160                 isc_refcount_destroy(&rbtdb->references);
7161                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7162                 return (ISC_R_NOMEMORY);
7163         }
7164         rbtdb->current_version->secure = dns_db_insecure;
7165         rbtdb->current_version->havensec3 = ISC_FALSE;
7166         rbtdb->current_version->flags = 0;
7167         rbtdb->current_version->iterations = 0;
7168         rbtdb->current_version->hash = 0;
7169         rbtdb->current_version->salt_length = 0;
7170         memset(rbtdb->current_version->salt, 0,
7171                sizeof(rbtdb->current_version->salt));
7172         rbtdb->future_version = NULL;
7173         ISC_LIST_INIT(rbtdb->open_versions);
7174         /*
7175          * Keep the current version in the open list so that list operation
7176          * won't happen in normal lookup operations.
7177          */
7178         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7179
7180         rbtdb->common.magic = DNS_DB_MAGIC;
7181         rbtdb->common.impmagic = RBTDB_MAGIC;
7182
7183         *dbp = (dns_db_t *)rbtdb;
7184
7185         return (ISC_R_SUCCESS);
7186
7187  cleanup_deadnodes:
7188         isc_mem_put(mctx, rbtdb->deadnodes,
7189                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7190
7191  cleanup_heaps:
7192         if (rbtdb->heaps != NULL) {
7193                 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7194                         if (rbtdb->heaps[i] != NULL)
7195                                 isc_heap_destroy(&rbtdb->heaps[i]);
7196                 isc_mem_put(mctx, rbtdb->heaps,
7197                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
7198         }
7199
7200  cleanup_rdatasets:
7201         if (rbtdb->rdatasets != NULL)
7202                 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7203                             sizeof(rdatasetheaderlist_t));
7204  cleanup_rrsetstats:
7205         if (rbtdb->rrsetstats != NULL)
7206                 dns_stats_detach(&rbtdb->rrsetstats);
7207
7208  cleanup_node_locks:
7209         isc_mem_put(mctx, rbtdb->node_locks,
7210                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7211
7212  cleanup_tree_lock:
7213         isc_rwlock_destroy(&rbtdb->tree_lock);
7214
7215  cleanup_lock:
7216         RBTDB_DESTROYLOCK(&rbtdb->lock);
7217
7218  cleanup_rbtdb:
7219         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
7220         return (result);
7221 }
7222
7223
7224 /*
7225  * Slabbed Rdataset Methods
7226  */
7227
7228 static void
7229 rdataset_disassociate(dns_rdataset_t *rdataset) {
7230         dns_db_t *db = rdataset->private1;
7231         dns_dbnode_t *node = rdataset->private2;
7232
7233         detachnode(db, &node);
7234 }
7235
7236 static isc_result_t
7237 rdataset_first(dns_rdataset_t *rdataset) {
7238         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7239         unsigned int count;
7240
7241         count = raw[0] * 256 + raw[1];
7242         if (count == 0) {
7243                 rdataset->private5 = NULL;
7244                 return (ISC_R_NOMORE);
7245         }
7246
7247 #if DNS_RDATASET_FIXED
7248         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7249                 raw += 2 + (4 * count);
7250         else
7251 #endif
7252                 raw += 2;
7253
7254         /*
7255          * The privateuint4 field is the number of rdata beyond the
7256          * cursor position, so we decrement the total count by one
7257          * before storing it.
7258          *
7259          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7260          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7261          * to the first entry in the offset table.
7262          */
7263         count--;
7264         rdataset->privateuint4 = count;
7265         rdataset->private5 = raw;
7266
7267         return (ISC_R_SUCCESS);
7268 }
7269
7270 static isc_result_t
7271 rdataset_next(dns_rdataset_t *rdataset) {
7272         unsigned int count;
7273         unsigned int length;
7274         unsigned char *raw;     /* RDATASLAB */
7275
7276         count = rdataset->privateuint4;
7277         if (count == 0)
7278                 return (ISC_R_NOMORE);
7279         count--;
7280         rdataset->privateuint4 = count;
7281
7282         /*
7283          * Skip forward one record (length + 4) or one offset (4).
7284          */
7285         raw = rdataset->private5;
7286 #if DNS_RDATASET_FIXED
7287         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7288 #endif
7289                 length = raw[0] * 256 + raw[1];
7290                 raw += length;
7291 #if DNS_RDATASET_FIXED
7292         }
7293         rdataset->private5 = raw + 4;           /* length(2) + order(2) */
7294 #else
7295         rdataset->private5 = raw + 2;           /* length(2) */
7296 #endif
7297
7298         return (ISC_R_SUCCESS);
7299 }
7300
7301 static void
7302 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7303         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
7304 #if DNS_RDATASET_FIXED
7305         unsigned int offset;
7306 #endif
7307         unsigned int length;
7308         isc_region_t r;
7309         unsigned int flags = 0;
7310
7311         REQUIRE(raw != NULL);
7312
7313         /*
7314          * Find the start of the record if not already in private5
7315          * then skip the length and order fields.
7316          */
7317 #if DNS_RDATASET_FIXED
7318         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7319                 offset = (raw[0] << 24) + (raw[1] << 16) +
7320                          (raw[2] << 8) + raw[3];
7321                 raw = rdataset->private3;
7322                 raw += offset;
7323         }
7324 #endif
7325         length = raw[0] * 256 + raw[1];
7326 #if DNS_RDATASET_FIXED
7327         raw += 4;
7328 #else
7329         raw += 2;
7330 #endif
7331         if (rdataset->type == dns_rdatatype_rrsig) {
7332                 if (*raw & DNS_RDATASLAB_OFFLINE)
7333                         flags |= DNS_RDATA_OFFLINE;
7334                 length--;
7335                 raw++;
7336         }
7337         r.length = length;
7338         r.base = raw;
7339         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7340         rdata->flags |= flags;
7341 }
7342
7343 static void
7344 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7345         dns_db_t *db = source->private1;
7346         dns_dbnode_t *node = source->private2;
7347         dns_dbnode_t *cloned_node = NULL;
7348
7349         attachnode(db, node, &cloned_node);
7350         *target = *source;
7351
7352         /*
7353          * Reset iterator state.
7354          */
7355         target->privateuint4 = 0;
7356         target->private5 = NULL;
7357 }
7358
7359 static unsigned int
7360 rdataset_count(dns_rdataset_t *rdataset) {
7361         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7362         unsigned int count;
7363
7364         count = raw[0] * 256 + raw[1];
7365
7366         return (count);
7367 }
7368
7369 static isc_result_t
7370 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7371                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7372 {
7373         dns_db_t *db = rdataset->private1;
7374         dns_dbnode_t *node = rdataset->private2;
7375         dns_dbnode_t *cloned_node;
7376         struct noqname *noqname = rdataset->private6;
7377
7378         cloned_node = NULL;
7379         attachnode(db, node, &cloned_node);
7380         nsec->methods = &rdataset_methods;
7381         nsec->rdclass = db->rdclass;
7382         nsec->type = noqname->type;
7383         nsec->covers = 0;
7384         nsec->ttl = rdataset->ttl;
7385         nsec->trust = rdataset->trust;
7386         nsec->private1 = rdataset->private1;
7387         nsec->private2 = rdataset->private2;
7388         nsec->private3 = noqname->neg;
7389         nsec->privateuint4 = 0;
7390         nsec->private5 = NULL;
7391         nsec->private6 = NULL;
7392         nsec->private7 = NULL;
7393
7394         cloned_node = NULL;
7395         attachnode(db, node, &cloned_node);
7396         nsecsig->methods = &rdataset_methods;
7397         nsecsig->rdclass = db->rdclass;
7398         nsecsig->type = dns_rdatatype_rrsig;
7399         nsecsig->covers = noqname->type;
7400         nsecsig->ttl = rdataset->ttl;
7401         nsecsig->trust = rdataset->trust;
7402         nsecsig->private1 = rdataset->private1;
7403         nsecsig->private2 = rdataset->private2;
7404         nsecsig->private3 = noqname->negsig;
7405         nsecsig->privateuint4 = 0;
7406         nsecsig->private5 = NULL;
7407         nsec->private6 = NULL;
7408         nsec->private7 = NULL;
7409
7410         dns_name_clone(&noqname->name, name);
7411
7412         return (ISC_R_SUCCESS);
7413 }
7414
7415 static isc_result_t
7416 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7417                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7418 {
7419         dns_db_t *db = rdataset->private1;
7420         dns_dbnode_t *node = rdataset->private2;
7421         dns_dbnode_t *cloned_node;
7422         struct noqname *closest = rdataset->private7;
7423
7424         cloned_node = NULL;
7425         attachnode(db, node, &cloned_node);
7426         nsec->methods = &rdataset_methods;
7427         nsec->rdclass = db->rdclass;
7428         nsec->type = closest->type;
7429         nsec->covers = 0;
7430         nsec->ttl = rdataset->ttl;
7431         nsec->trust = rdataset->trust;
7432         nsec->private1 = rdataset->private1;
7433         nsec->private2 = rdataset->private2;
7434         nsec->private3 = closest->neg;
7435         nsec->privateuint4 = 0;
7436         nsec->private5 = NULL;
7437         nsec->private6 = NULL;
7438         nsec->private7 = NULL;
7439
7440         cloned_node = NULL;
7441         attachnode(db, node, &cloned_node);
7442         nsecsig->methods = &rdataset_methods;
7443         nsecsig->rdclass = db->rdclass;
7444         nsecsig->type = dns_rdatatype_rrsig;
7445         nsecsig->covers = closest->type;
7446         nsecsig->ttl = rdataset->ttl;
7447         nsecsig->trust = rdataset->trust;
7448         nsecsig->private1 = rdataset->private1;
7449         nsecsig->private2 = rdataset->private2;
7450         nsecsig->private3 = closest->negsig;
7451         nsecsig->privateuint4 = 0;
7452         nsecsig->private5 = NULL;
7453         nsec->private6 = NULL;
7454         nsec->private7 = NULL;
7455
7456         dns_name_clone(&closest->name, name);
7457
7458         return (ISC_R_SUCCESS);
7459 }
7460
7461 static void
7462 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
7463         dns_rbtdb_t *rbtdb = rdataset->private1;
7464         dns_rbtnode_t *rbtnode = rdataset->private2;
7465         rdatasetheader_t *header = rdataset->private3;
7466
7467         header--;
7468         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7469                   isc_rwlocktype_write);
7470         header->trust = rdataset->trust = trust;
7471         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7472                   isc_rwlocktype_write);
7473 }
7474
7475 static void
7476 rdataset_expire(dns_rdataset_t *rdataset) {
7477         dns_rbtdb_t *rbtdb = rdataset->private1;
7478         dns_rbtnode_t *rbtnode = rdataset->private2;
7479         rdatasetheader_t *header = rdataset->private3;
7480
7481         header--;
7482         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7483                   isc_rwlocktype_write);
7484         expire_header(rbtdb, header, ISC_FALSE);
7485         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7486                   isc_rwlocktype_write);
7487 }
7488
7489 /*
7490  * Rdataset Iterator Methods
7491  */
7492
7493 static void
7494 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7495         rbtdb_rdatasetiter_t *rbtiterator;
7496
7497         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7498
7499         if (rbtiterator->common.version != NULL)
7500                 closeversion(rbtiterator->common.db,
7501                              &rbtiterator->common.version, ISC_FALSE);
7502         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7503         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7504                     sizeof(*rbtiterator));
7505
7506         *iteratorp = NULL;
7507 }
7508
7509 static isc_result_t
7510 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7511         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7512         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7513         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7514         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7515         rdatasetheader_t *header, *top_next;
7516         rbtdb_serial_t serial;
7517         isc_stdtime_t now;
7518
7519         if (IS_CACHE(rbtdb)) {
7520                 serial = 1;
7521                 now = rbtiterator->common.now;
7522         } else {
7523                 serial = rbtversion->serial;
7524                 now = 0;
7525         }
7526
7527         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7528                   isc_rwlocktype_read);
7529
7530         for (header = rbtnode->data; header != NULL; header = top_next) {
7531                 top_next = header->next;
7532                 do {
7533                         if (header->serial <= serial && !IGNORE(header)) {
7534                                 /*
7535                                  * Is this a "this rdataset doesn't exist"
7536                                  * record?  Or is it too old in the cache?
7537                                  *
7538                                  * Note: unlike everywhere else, we
7539                                  * check for now > header->rdh_ttl instead
7540                                  * of now >= header->rdh_ttl.  This allows
7541                                  * ANY and RRSIG queries for 0 TTL
7542                                  * rdatasets to work.
7543                                  */
7544                                 if (NONEXISTENT(header) ||
7545                                     (now != 0 && now > header->rdh_ttl))
7546                                         header = NULL;
7547                                 break;
7548                         } else
7549                                 header = header->down;
7550                 } while (header != NULL);
7551                 if (header != NULL)
7552                         break;
7553         }
7554
7555         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7556                     isc_rwlocktype_read);
7557
7558         rbtiterator->current = header;
7559
7560         if (header == NULL)
7561                 return (ISC_R_NOMORE);
7562
7563         return (ISC_R_SUCCESS);
7564 }
7565
7566 static isc_result_t
7567 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7568         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7569         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7570         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7571         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7572         rdatasetheader_t *header, *top_next;
7573         rbtdb_serial_t serial;
7574         isc_stdtime_t now;
7575         rbtdb_rdatatype_t type, negtype;
7576         dns_rdatatype_t rdtype, covers;
7577
7578         header = rbtiterator->current;
7579         if (header == NULL)
7580                 return (ISC_R_NOMORE);
7581
7582         if (IS_CACHE(rbtdb)) {
7583                 serial = 1;
7584                 now = rbtiterator->common.now;
7585         } else {
7586                 serial = rbtversion->serial;
7587                 now = 0;
7588         }
7589
7590         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7591                   isc_rwlocktype_read);
7592
7593         type = header->type;
7594         rdtype = RBTDB_RDATATYPE_BASE(header->type);
7595         if (rdtype == 0) {
7596                 covers = RBTDB_RDATATYPE_EXT(header->type);
7597                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7598         } else
7599                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7600         for (header = header->next; header != NULL; header = top_next) {
7601                 top_next = header->next;
7602                 /*
7603                  * If not walking back up the down list.
7604                  */
7605                 if (header->type != type && header->type != negtype) {
7606                         do {
7607                                 if (header->serial <= serial &&
7608                                     !IGNORE(header)) {
7609                                         /*
7610                                          * Is this a "this rdataset doesn't
7611                                          * exist" record?
7612                                          *
7613                                          * Note: unlike everywhere else, we
7614                                          * check for now > header->ttl instead
7615                                          * of now >= header->ttl.  This allows
7616                                          * ANY and RRSIG queries for 0 TTL
7617                                          * rdatasets to work.
7618                                          */
7619                                         if ((header->attributes &
7620                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
7621                                             (now != 0 && now > header->rdh_ttl))
7622                                                 header = NULL;
7623                                         break;
7624                                 } else
7625                                         header = header->down;
7626                         } while (header != NULL);
7627                         if (header != NULL)
7628                                 break;
7629                 }
7630         }
7631
7632         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7633                     isc_rwlocktype_read);
7634
7635         rbtiterator->current = header;
7636
7637         if (header == NULL)
7638                 return (ISC_R_NOMORE);
7639
7640         return (ISC_R_SUCCESS);
7641 }
7642
7643 static void
7644 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7645         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7646         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7647         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7648         rdatasetheader_t *header;
7649
7650         header = rbtiterator->current;
7651         REQUIRE(header != NULL);
7652
7653         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7654                   isc_rwlocktype_read);
7655
7656         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7657                       rdataset);
7658
7659         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7660                     isc_rwlocktype_read);
7661 }
7662
7663
7664 /*
7665  * Database Iterator Methods
7666  */
7667
7668 static inline void
7669 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7670         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7671         dns_rbtnode_t *node = rbtdbiter->node;
7672
7673         if (node == NULL)
7674                 return;
7675
7676         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7677         reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7678 }
7679
7680 static inline void
7681 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7682         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7683         dns_rbtnode_t *node = rbtdbiter->node;
7684         nodelock_t *lock;
7685
7686         if (node == NULL)
7687                 return;
7688
7689         lock = &rbtdb->node_locks[node->locknum].lock;
7690         NODE_LOCK(lock, isc_rwlocktype_read);
7691         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7692                             rbtdbiter->tree_locked, ISC_FALSE);
7693         NODE_UNLOCK(lock, isc_rwlocktype_read);
7694
7695         rbtdbiter->node = NULL;
7696 }
7697
7698 static void
7699 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7700         dns_rbtnode_t *node;
7701         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7702         isc_boolean_t was_read_locked = ISC_FALSE;
7703         nodelock_t *lock;
7704         int i;
7705
7706         if (rbtdbiter->delete != 0) {
7707                 /*
7708                  * Note that "%d node of %d in tree" can report things like
7709                  * "flush_deletions: 59 nodes of 41 in tree".  This means
7710                  * That some nodes appear on the deletions list more than
7711                  * once.  Only the last occurence will actually be deleted.
7712                  */
7713                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7714                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7715                               "flush_deletions: %d nodes of %d in tree",
7716                               rbtdbiter->delete,
7717                               dns_rbt_nodecount(rbtdb->tree));
7718
7719                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7720                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7721                         was_read_locked = ISC_TRUE;
7722                 }
7723                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7724                 rbtdbiter->tree_locked = isc_rwlocktype_write;
7725
7726                 for (i = 0; i < rbtdbiter->delete; i++) {
7727                         node = rbtdbiter->deletions[i];
7728                         lock = &rbtdb->node_locks[node->locknum].lock;
7729
7730                         NODE_LOCK(lock, isc_rwlocktype_read);
7731                         decrement_reference(rbtdb, node, 0,
7732                                             isc_rwlocktype_read,
7733                                             rbtdbiter->tree_locked, ISC_FALSE);
7734                         NODE_UNLOCK(lock, isc_rwlocktype_read);
7735                 }
7736
7737                 rbtdbiter->delete = 0;
7738
7739                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7740                 if (was_read_locked) {
7741                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7742                         rbtdbiter->tree_locked = isc_rwlocktype_read;
7743
7744                 } else {
7745                         rbtdbiter->tree_locked = isc_rwlocktype_none;
7746                 }
7747         }
7748 }
7749
7750 static inline void
7751 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
7752         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7753
7754         REQUIRE(rbtdbiter->paused);
7755         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
7756
7757         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7758         rbtdbiter->tree_locked = isc_rwlocktype_read;
7759
7760         rbtdbiter->paused = ISC_FALSE;
7761 }
7762
7763 static void
7764 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
7765         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
7766         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7767         dns_db_t *db = NULL;
7768
7769         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7770                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7771                 rbtdbiter->tree_locked = isc_rwlocktype_none;
7772         } else
7773                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
7774
7775         dereference_iter_node(rbtdbiter);
7776
7777         flush_deletions(rbtdbiter);
7778
7779         dns_db_attach(rbtdbiter->common.db, &db);
7780         dns_db_detach(&rbtdbiter->common.db);
7781
7782         dns_rbtnodechain_reset(&rbtdbiter->chain);
7783         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7784         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
7785         dns_db_detach(&db);
7786
7787         *iteratorp = NULL;
7788 }
7789
7790 static isc_result_t
7791 dbiterator_first(dns_dbiterator_t *iterator) {
7792         isc_result_t result;
7793         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7794         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7795         dns_name_t *name, *origin;
7796
7797         if (rbtdbiter->result != ISC_R_SUCCESS &&
7798             rbtdbiter->result != ISC_R_NOMORE)
7799                 return (rbtdbiter->result);
7800
7801         if (rbtdbiter->paused)
7802                 resume_iteration(rbtdbiter);
7803
7804         dereference_iter_node(rbtdbiter);
7805
7806         name = dns_fixedname_name(&rbtdbiter->name);
7807         origin = dns_fixedname_name(&rbtdbiter->origin);
7808         dns_rbtnodechain_reset(&rbtdbiter->chain);
7809         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7810
7811         if (rbtdbiter->nsec3only) {
7812                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7813                 result = dns_rbtnodechain_first(rbtdbiter->current,
7814                                                 rbtdb->nsec3, name, origin);
7815         } else {
7816                 rbtdbiter->current = &rbtdbiter->chain;
7817                 result = dns_rbtnodechain_first(rbtdbiter->current,
7818                                                 rbtdb->tree, name, origin);
7819                 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
7820                         rbtdbiter->current = &rbtdbiter->nsec3chain;
7821                         result = dns_rbtnodechain_first(rbtdbiter->current,
7822                                                         rbtdb->nsec3, name,
7823                                                         origin);
7824                 }
7825         }
7826         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7827                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7828                                                   NULL, &rbtdbiter->node);
7829                 if (result == ISC_R_SUCCESS) {
7830                         rbtdbiter->new_origin = ISC_TRUE;
7831                         reference_iter_node(rbtdbiter);
7832                 }
7833         } else {
7834                 INSIST(result == ISC_R_NOTFOUND);
7835                 result = ISC_R_NOMORE; /* The tree is empty. */
7836         }
7837
7838         rbtdbiter->result = result;
7839
7840         return (result);
7841 }
7842
7843 static isc_result_t
7844 dbiterator_last(dns_dbiterator_t *iterator) {
7845         isc_result_t result;
7846         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7847         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7848         dns_name_t *name, *origin;
7849
7850         if (rbtdbiter->result != ISC_R_SUCCESS &&
7851             rbtdbiter->result != ISC_R_NOMORE)
7852                 return (rbtdbiter->result);
7853
7854         if (rbtdbiter->paused)
7855                 resume_iteration(rbtdbiter);
7856
7857         dereference_iter_node(rbtdbiter);
7858
7859         name = dns_fixedname_name(&rbtdbiter->name);
7860         origin = dns_fixedname_name(&rbtdbiter->origin);
7861         dns_rbtnodechain_reset(&rbtdbiter->chain);
7862         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7863
7864         result = ISC_R_NOTFOUND;
7865         if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
7866                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7867                 result = dns_rbtnodechain_last(rbtdbiter->current,
7868                                                rbtdb->nsec3, name, origin);
7869         }
7870         if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
7871                 rbtdbiter->current = &rbtdbiter->chain;
7872                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7873                                                name, origin);
7874         }
7875         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7876                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7877                                                   NULL, &rbtdbiter->node);
7878                 if (result == ISC_R_SUCCESS) {
7879                         rbtdbiter->new_origin = ISC_TRUE;
7880                         reference_iter_node(rbtdbiter);
7881                 }
7882         } else {
7883                 INSIST(result == ISC_R_NOTFOUND);
7884                 result = ISC_R_NOMORE; /* The tree is empty. */
7885         }
7886
7887         rbtdbiter->result = result;
7888
7889         return (result);
7890 }
7891
7892 static isc_result_t
7893 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
7894         isc_result_t result;
7895         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7896         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7897         dns_name_t *iname, *origin;
7898
7899         if (rbtdbiter->result != ISC_R_SUCCESS &&
7900             rbtdbiter->result != ISC_R_NOTFOUND &&
7901             rbtdbiter->result != ISC_R_NOMORE)
7902                 return (rbtdbiter->result);
7903
7904         if (rbtdbiter->paused)
7905                 resume_iteration(rbtdbiter);
7906
7907         dereference_iter_node(rbtdbiter);
7908
7909         iname = dns_fixedname_name(&rbtdbiter->name);
7910         origin = dns_fixedname_name(&rbtdbiter->origin);
7911         dns_rbtnodechain_reset(&rbtdbiter->chain);
7912         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7913
7914         if (rbtdbiter->nsec3only) {
7915                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7916                 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7917                                           &rbtdbiter->node,
7918                                           rbtdbiter->current,
7919                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7920         } else if (rbtdbiter->nonsec3) {
7921                 rbtdbiter->current = &rbtdbiter->chain;
7922                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7923                                           &rbtdbiter->node,
7924                                           rbtdbiter->current,
7925                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7926         } else {
7927                 /*
7928                  * Stay on main chain if not found on either chain.
7929                  */
7930                 rbtdbiter->current = &rbtdbiter->chain;
7931                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7932                                           &rbtdbiter->node,
7933                                           rbtdbiter->current,
7934                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7935                 if (result == DNS_R_PARTIALMATCH) {
7936                         dns_rbtnode_t *node = NULL;
7937                         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7938                                                   &node, &rbtdbiter->nsec3chain,
7939                                                   DNS_RBTFIND_EMPTYDATA,
7940                                                   NULL, NULL);
7941                         if (result == ISC_R_SUCCESS) {
7942                                 rbtdbiter->node = node;
7943                                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7944                         }
7945                 }
7946         }
7947
7948 #if 1
7949         if (result == ISC_R_SUCCESS) {
7950                 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
7951                                                   origin, NULL);
7952                 if (result == ISC_R_SUCCESS) {
7953                         rbtdbiter->new_origin = ISC_TRUE;
7954                         reference_iter_node(rbtdbiter);
7955                 }
7956         } else if (result == DNS_R_PARTIALMATCH) {
7957                 result = ISC_R_NOTFOUND;
7958                 rbtdbiter->node = NULL;
7959         }
7960
7961         rbtdbiter->result = result;
7962 #else
7963         if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7964                 isc_result_t tresult;
7965                 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
7966                                                    origin, NULL);
7967                 if (tresult == ISC_R_SUCCESS) {
7968                         rbtdbiter->new_origin = ISC_TRUE;
7969                         reference_iter_node(rbtdbiter);
7970                 } else {
7971                         result = tresult;
7972                         rbtdbiter->node = NULL;
7973                 }
7974         } else
7975                 rbtdbiter->node = NULL;
7976
7977         rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
7978                             ISC_R_SUCCESS : result;
7979 #endif
7980
7981         return (result);
7982 }
7983
7984 static isc_result_t
7985 dbiterator_prev(dns_dbiterator_t *iterator) {
7986         isc_result_t result;
7987         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7988         dns_name_t *name, *origin;
7989         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7990
7991         REQUIRE(rbtdbiter->node != NULL);
7992
7993         if (rbtdbiter->result != ISC_R_SUCCESS)
7994                 return (rbtdbiter->result);
7995
7996         if (rbtdbiter->paused)
7997                 resume_iteration(rbtdbiter);
7998
7999         name = dns_fixedname_name(&rbtdbiter->name);
8000         origin = dns_fixedname_name(&rbtdbiter->origin);
8001         result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
8002         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8003             !rbtdbiter->nonsec3 &&
8004             &rbtdbiter->nsec3chain == rbtdbiter->current) {
8005                 rbtdbiter->current = &rbtdbiter->chain;
8006                 dns_rbtnodechain_reset(rbtdbiter->current);
8007                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8008                                                name, origin);
8009                 if (result == ISC_R_NOTFOUND)
8010                         result = ISC_R_NOMORE;
8011         }
8012
8013         dereference_iter_node(rbtdbiter);
8014
8015         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8016                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8017                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8018                                                   NULL, &rbtdbiter->node);
8019         }
8020
8021         if (result == ISC_R_SUCCESS)
8022                 reference_iter_node(rbtdbiter);
8023
8024         rbtdbiter->result = result;
8025
8026         return (result);
8027 }
8028
8029 static isc_result_t
8030 dbiterator_next(dns_dbiterator_t *iterator) {
8031         isc_result_t result;
8032         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8033         dns_name_t *name, *origin;
8034         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8035
8036         REQUIRE(rbtdbiter->node != NULL);
8037
8038         if (rbtdbiter->result != ISC_R_SUCCESS)
8039                 return (rbtdbiter->result);
8040
8041         if (rbtdbiter->paused)
8042                 resume_iteration(rbtdbiter);
8043
8044         name = dns_fixedname_name(&rbtdbiter->name);
8045         origin = dns_fixedname_name(&rbtdbiter->origin);
8046         result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8047         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8048             !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8049                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8050                 dns_rbtnodechain_reset(rbtdbiter->current);
8051                 result = dns_rbtnodechain_first(rbtdbiter->current,
8052                                                 rbtdb->nsec3, name, origin);
8053                 if (result == ISC_R_NOTFOUND)
8054                         result = ISC_R_NOMORE;
8055         }
8056
8057         dereference_iter_node(rbtdbiter);
8058
8059         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8060                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8061                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8062                                                   NULL, &rbtdbiter->node);
8063         }
8064         if (result == ISC_R_SUCCESS)
8065                 reference_iter_node(rbtdbiter);
8066
8067         rbtdbiter->result = result;
8068
8069         return (result);
8070 }
8071
8072 static isc_result_t
8073 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8074                    dns_name_t *name)
8075 {
8076         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8077         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8078         dns_rbtnode_t *node = rbtdbiter->node;
8079         isc_result_t result;
8080         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8081         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8082
8083         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8084         REQUIRE(rbtdbiter->node != NULL);
8085
8086         if (rbtdbiter->paused)
8087                 resume_iteration(rbtdbiter);
8088
8089         if (name != NULL) {
8090                 if (rbtdbiter->common.relative_names)
8091                         origin = NULL;
8092                 result = dns_name_concatenate(nodename, origin, name, NULL);
8093                 if (result != ISC_R_SUCCESS)
8094                         return (result);
8095                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8096                         result = DNS_R_NEWORIGIN;
8097         } else
8098                 result = ISC_R_SUCCESS;
8099
8100         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8101         new_reference(rbtdb, node);
8102         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8103
8104         *nodep = rbtdbiter->node;
8105
8106         if (iterator->cleaning && result == ISC_R_SUCCESS) {
8107                 isc_result_t expire_result;
8108
8109                 /*
8110                  * If the deletion array is full, flush it before trying
8111                  * to expire the current node.  The current node can't
8112                  * fully deleted while the iteration cursor is still on it.
8113                  */
8114                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8115                         flush_deletions(rbtdbiter);
8116
8117                 expire_result = expirenode(iterator->db, *nodep, 0);
8118
8119                 /*
8120                  * expirenode() currently always returns success.
8121                  */
8122                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8123                         unsigned int refs;
8124
8125                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
8126                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8127                         dns_rbtnode_refincrement(node, &refs);
8128                         INSIST(refs != 0);
8129                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8130                 }
8131         }
8132
8133         return (result);
8134 }
8135
8136 static isc_result_t
8137 dbiterator_pause(dns_dbiterator_t *iterator) {
8138         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8139         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8140
8141         if (rbtdbiter->result != ISC_R_SUCCESS &&
8142             rbtdbiter->result != ISC_R_NOMORE)
8143                 return (rbtdbiter->result);
8144
8145         if (rbtdbiter->paused)
8146                 return (ISC_R_SUCCESS);
8147
8148         rbtdbiter->paused = ISC_TRUE;
8149
8150         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8151                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8152                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8153                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8154         }
8155
8156         flush_deletions(rbtdbiter);
8157
8158         return (ISC_R_SUCCESS);
8159 }
8160
8161 static isc_result_t
8162 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8163         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8164         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8165
8166         if (rbtdbiter->result != ISC_R_SUCCESS)
8167                 return (rbtdbiter->result);
8168
8169         return (dns_name_copy(origin, name, NULL));
8170 }
8171
8172 /*%
8173  * Additional cache routines.
8174  */
8175 static isc_result_t
8176 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8177                        dns_rdatatype_t qtype, dns_acache_t *acache,
8178                        dns_zone_t **zonep, dns_db_t **dbp,
8179                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8180                        dns_name_t *fname, dns_message_t *msg,
8181                        isc_stdtime_t now)
8182 {
8183         dns_rbtdb_t *rbtdb = rdataset->private1;
8184         dns_rbtnode_t *rbtnode = rdataset->private2;
8185         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8186         unsigned int current_count = rdataset->privateuint4;
8187         unsigned int count;
8188         rdatasetheader_t *header;
8189         nodelock_t *nodelock;
8190         unsigned int total_count;
8191         acachectl_t *acarray;
8192         dns_acacheentry_t *entry;
8193         isc_result_t result;
8194
8195         UNUSED(qtype); /* we do not use this value at least for now */
8196         UNUSED(acache);
8197
8198         header = (struct rdatasetheader *)(raw - sizeof(*header));
8199
8200         total_count = raw[0] * 256 + raw[1];
8201         INSIST(total_count > current_count);
8202         count = total_count - current_count - 1;
8203
8204         acarray = NULL;
8205
8206         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8207         NODE_LOCK(nodelock, isc_rwlocktype_read);
8208
8209         switch (type) {
8210         case dns_rdatasetadditional_fromauth:
8211                 acarray = header->additional_auth;
8212                 break;
8213         case dns_rdatasetadditional_fromcache:
8214                 acarray = NULL;
8215                 break;
8216         case dns_rdatasetadditional_fromglue:
8217                 acarray = header->additional_glue;
8218                 break;
8219         default:
8220                 INSIST(0);
8221         }
8222
8223         if (acarray == NULL) {
8224                 if (type != dns_rdatasetadditional_fromcache)
8225                         dns_acache_countquerymiss(acache);
8226                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8227                 return (ISC_R_NOTFOUND);
8228         }
8229
8230         if (acarray[count].entry == NULL) {
8231                 dns_acache_countquerymiss(acache);
8232                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8233                 return (ISC_R_NOTFOUND);
8234         }
8235
8236         entry = NULL;
8237         dns_acache_attachentry(acarray[count].entry, &entry);
8238
8239         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8240
8241         result = dns_acache_getentry(entry, zonep, dbp, versionp,
8242                                      nodep, fname, msg, now);
8243
8244         dns_acache_detachentry(&entry);
8245
8246         return (result);
8247 }
8248
8249 static void
8250 acache_callback(dns_acacheentry_t *entry, void **arg) {
8251         dns_rbtdb_t *rbtdb;
8252         dns_rbtnode_t *rbtnode;
8253         nodelock_t *nodelock;
8254         acachectl_t *acarray = NULL;
8255         acache_cbarg_t *cbarg;
8256         unsigned int count;
8257
8258         REQUIRE(arg != NULL);
8259         cbarg = *arg;
8260
8261         /*
8262          * The caller must hold the entry lock.
8263          */
8264
8265         rbtdb = (dns_rbtdb_t *)cbarg->db;
8266         rbtnode = (dns_rbtnode_t *)cbarg->node;
8267
8268         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8269         NODE_LOCK(nodelock, isc_rwlocktype_write);
8270
8271         switch (cbarg->type) {
8272         case dns_rdatasetadditional_fromauth:
8273                 acarray = cbarg->header->additional_auth;
8274                 break;
8275         case dns_rdatasetadditional_fromglue:
8276                 acarray = cbarg->header->additional_glue;
8277                 break;
8278         default:
8279                 INSIST(0);
8280         }
8281
8282         count = cbarg->count;
8283         if (acarray != NULL && acarray[count].entry == entry) {
8284                 acarray[count].entry = NULL;
8285                 INSIST(acarray[count].cbarg == cbarg);
8286                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8287                 acarray[count].cbarg = NULL;
8288         } else
8289                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8290
8291         dns_acache_detachentry(&entry);
8292
8293         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8294
8295         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8296         dns_db_detach((dns_db_t **)(void*)&rbtdb);
8297
8298         *arg = NULL;
8299 }
8300
8301 static void
8302 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8303                       acache_cbarg_t **cbargp)
8304 {
8305         acache_cbarg_t *cbarg;
8306
8307         REQUIRE(mctx != NULL);
8308         REQUIRE(entry != NULL);
8309         REQUIRE(cbargp != NULL && *cbargp != NULL);
8310
8311         cbarg = *cbargp;
8312
8313         dns_acache_cancelentry(entry);
8314         dns_db_detachnode(cbarg->db, &cbarg->node);
8315         dns_db_detach(&cbarg->db);
8316
8317         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8318
8319         *cbargp = NULL;
8320 }
8321
8322 static isc_result_t
8323 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8324                        dns_rdatatype_t qtype, dns_acache_t *acache,
8325                        dns_zone_t *zone, dns_db_t *db,
8326                        dns_dbversion_t *version, dns_dbnode_t *node,
8327                        dns_name_t *fname)
8328 {
8329         dns_rbtdb_t *rbtdb = rdataset->private1;
8330         dns_rbtnode_t *rbtnode = rdataset->private2;
8331         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8332         unsigned int current_count = rdataset->privateuint4;
8333         rdatasetheader_t *header;
8334         unsigned int total_count, count;
8335         nodelock_t *nodelock;
8336         isc_result_t result;
8337         acachectl_t *acarray;
8338         dns_acacheentry_t *newentry, *oldentry = NULL;
8339         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8340
8341         UNUSED(qtype);
8342
8343         if (type == dns_rdatasetadditional_fromcache)
8344                 return (ISC_R_SUCCESS);
8345
8346         header = (struct rdatasetheader *)(raw - sizeof(*header));
8347
8348         total_count = raw[0] * 256 + raw[1];
8349         INSIST(total_count > current_count);
8350         count = total_count - current_count - 1; /* should be private data */
8351
8352         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8353         if (newcbarg == NULL)
8354                 return (ISC_R_NOMEMORY);
8355         newcbarg->type = type;
8356         newcbarg->count = count;
8357         newcbarg->header = header;
8358         newcbarg->db = NULL;
8359         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8360         newcbarg->node = NULL;
8361         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8362                           &newcbarg->node);
8363         newentry = NULL;
8364         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8365                                         acache_callback, newcbarg, &newentry);
8366         if (result != ISC_R_SUCCESS)
8367                 goto fail;
8368         /* Set cache data in the new entry. */
8369         result = dns_acache_setentry(acache, newentry, zone, db,
8370                                      version, node, fname);
8371         if (result != ISC_R_SUCCESS)
8372                 goto fail;
8373
8374         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8375         NODE_LOCK(nodelock, isc_rwlocktype_write);
8376
8377         acarray = NULL;
8378         switch (type) {
8379         case dns_rdatasetadditional_fromauth:
8380                 acarray = header->additional_auth;
8381                 break;
8382         case dns_rdatasetadditional_fromglue:
8383                 acarray = header->additional_glue;
8384                 break;
8385         default:
8386                 INSIST(0);
8387         }
8388
8389         if (acarray == NULL) {
8390                 unsigned int i;
8391
8392                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8393                                       sizeof(acachectl_t));
8394
8395                 if (acarray == NULL) {
8396                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8397                         goto fail;
8398                 }
8399
8400                 for (i = 0; i < total_count; i++) {
8401                         acarray[i].entry = NULL;
8402                         acarray[i].cbarg = NULL;
8403                 }
8404         }
8405         switch (type) {
8406         case dns_rdatasetadditional_fromauth:
8407                 header->additional_auth = acarray;
8408                 break;
8409         case dns_rdatasetadditional_fromglue:
8410                 header->additional_glue = acarray;
8411                 break;
8412         default:
8413                 INSIST(0);
8414         }
8415
8416         if (acarray[count].entry != NULL) {
8417                 /*
8418                  * Swap the entry.  Delay cleaning-up the old entry since
8419                  * it would require a node lock.
8420                  */
8421                 oldentry = acarray[count].entry;
8422                 INSIST(acarray[count].cbarg != NULL);
8423                 oldcbarg = acarray[count].cbarg;
8424         }
8425         acarray[count].entry = newentry;
8426         acarray[count].cbarg = newcbarg;
8427
8428         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8429
8430         if (oldentry != NULL) {
8431                 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8432                 dns_acache_detachentry(&oldentry);
8433         }
8434
8435         return (ISC_R_SUCCESS);
8436
8437  fail:
8438         if (newcbarg != NULL) {
8439                 if (newentry != NULL) {
8440                         acache_cancelentry(rbtdb->common.mctx, newentry,
8441                                            &newcbarg);
8442                         dns_acache_detachentry(&newentry);
8443                 } else {
8444                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8445                         dns_db_detach(&newcbarg->db);
8446                         isc_mem_put(rbtdb->common.mctx, newcbarg,
8447                             sizeof(*newcbarg));
8448                 }
8449         }
8450
8451         return (result);
8452 }
8453
8454 static isc_result_t
8455 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8456                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8457 {
8458         dns_rbtdb_t *rbtdb = rdataset->private1;
8459         dns_rbtnode_t *rbtnode = rdataset->private2;
8460         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8461         unsigned int current_count = rdataset->privateuint4;
8462         rdatasetheader_t *header;
8463         nodelock_t *nodelock;
8464         unsigned int total_count, count;
8465         acachectl_t *acarray;
8466         dns_acacheentry_t *entry;
8467         acache_cbarg_t *cbarg;
8468
8469         UNUSED(qtype);          /* we do not use this value at least for now */
8470         UNUSED(acache);
8471
8472         if (type == dns_rdatasetadditional_fromcache)
8473                 return (ISC_R_SUCCESS);
8474
8475         header = (struct rdatasetheader *)(raw - sizeof(*header));
8476
8477         total_count = raw[0] * 256 + raw[1];
8478         INSIST(total_count > current_count);
8479         count = total_count - current_count - 1;
8480
8481         acarray = NULL;
8482         entry = NULL;
8483
8484         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8485         NODE_LOCK(nodelock, isc_rwlocktype_write);
8486
8487         switch (type) {
8488         case dns_rdatasetadditional_fromauth:
8489                 acarray = header->additional_auth;
8490                 break;
8491         case dns_rdatasetadditional_fromglue:
8492                 acarray = header->additional_glue;
8493                 break;
8494         default:
8495                 INSIST(0);
8496         }
8497
8498         if (acarray == NULL) {
8499                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8500                 return (ISC_R_NOTFOUND);
8501         }
8502
8503         entry = acarray[count].entry;
8504         if (entry == NULL) {
8505                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8506                 return (ISC_R_NOTFOUND);
8507         }
8508
8509         acarray[count].entry = NULL;
8510         cbarg = acarray[count].cbarg;
8511         acarray[count].cbarg = NULL;
8512
8513         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8514
8515         if (entry != NULL) {
8516                 if (cbarg != NULL)
8517                         acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8518                 dns_acache_detachentry(&entry);
8519         }
8520
8521         return (ISC_R_SUCCESS);
8522 }
8523
8524 /*%
8525  * Routines for LRU-based cache management.
8526  */
8527
8528 /*%
8529  * See if a given cache entry that is being reused needs to be updated
8530  * in the LRU-list.  From the LRU management point of view, this function is
8531  * expected to return true for almost all cases.  When used with threads,
8532  * however, this may cause a non-negligible performance penalty because a
8533  * writer lock will have to be acquired before updating the list.
8534  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8535  * function returns true if the entry has not been updated for some period of
8536  * time.  We differentiate the NS or glue address case and the others since
8537  * experiments have shown that the former tends to be accessed relatively
8538  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8539  * may cause external queries at a higher level zone, involving more
8540  * transactions).
8541  *
8542  * Caller must hold the node (read or write) lock.
8543  */
8544 static inline isc_boolean_t
8545 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8546         if ((header->attributes &
8547              (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8548                 return (ISC_FALSE);
8549
8550 #if DNS_RBTDB_LIMITLRUUPDATE
8551         if (header->type == dns_rdatatype_ns ||
8552             (header->trust == dns_trust_glue &&
8553              (header->type == dns_rdatatype_a ||
8554               header->type == dns_rdatatype_aaaa))) {
8555                 /*
8556                  * Glue records are updated if at least 60 seconds have passed
8557                  * since the previous update time.
8558                  */
8559                 return (header->last_used + 60 <= now);
8560         }
8561
8562         /* Other records are updated if 5 minutes have passed. */
8563         return (header->last_used + 300 <= now);
8564 #else
8565         UNUSED(now);
8566
8567         return (ISC_TRUE);
8568 #endif
8569 }
8570
8571 /*%
8572  * Update the timestamp of a given cache entry and move it to the head
8573  * of the corresponding LRU list.
8574  *
8575  * Caller must hold the node (write) lock.
8576  *
8577  * Note that the we do NOT touch the heap here, as the TTL has not changed.
8578  */
8579 static void
8580 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8581               isc_stdtime_t now)
8582 {
8583         INSIST(IS_CACHE(rbtdb));
8584
8585         /* To be checked: can we really assume this? XXXMLG */
8586         INSIST(ISC_LINK_LINKED(header, link));
8587
8588         ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
8589         header->last_used = now;
8590         ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
8591 }
8592
8593 /*%
8594  * Purge some expired and/or stale (i.e. unused for some period) cache entries
8595  * under an overmem condition.  To recover from this condition quickly, up to
8596  * 2 entries will be purged.  This process is triggered while adding a new
8597  * entry, and we specifically avoid purging entries in the same LRU bucket as
8598  * the one to which the new entry will belong.  Otherwise, we might purge
8599  * entries of the same name of different RR types while adding RRsets from a
8600  * single response (consider the case where we're adding A and AAAA glue records
8601  * of the same NS name).
8602  */
8603 static void
8604 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8605               isc_stdtime_t now, isc_boolean_t tree_locked)
8606 {
8607         rdatasetheader_t *header, *header_prev;
8608         unsigned int locknum;
8609         int purgecount = 2;
8610
8611         for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8612              locknum != locknum_start && purgecount > 0;
8613              locknum = (locknum + 1) % rbtdb->node_lock_count) {
8614                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8615                           isc_rwlocktype_write);
8616
8617                 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8618                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8619                         expire_header(rbtdb, header, tree_locked);
8620                         purgecount--;
8621                 }
8622
8623                 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8624                      header != NULL && purgecount > 0;
8625                      header = header_prev) {
8626                         header_prev = ISC_LIST_PREV(header, link);
8627                         /*
8628                          * Unlink the entry at this point to avoid checking it
8629                          * again even if it's currently used someone else and
8630                          * cannot be purged at this moment.  This entry won't be
8631                          * referenced any more (so unlinking is safe) since the
8632                          * TTL was reset to 0.
8633                          */
8634                         ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8635                                         link);
8636                         expire_header(rbtdb, header, tree_locked);
8637                         purgecount--;
8638                 }
8639
8640                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8641                                     isc_rwlocktype_write);
8642         }
8643 }
8644
8645 static void
8646 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8647               isc_boolean_t tree_locked)
8648 {
8649         set_ttl(rbtdb, header, 0);
8650         header->attributes |= RDATASET_ATTR_STALE;
8651         header->node->dirty = 1;
8652
8653         /*
8654          * Caller must hold the node (write) lock.
8655          */
8656
8657         if (dns_rbtnode_refcurrent(header->node) == 0) {
8658                 /*
8659                  * If no one else is using the node, we can clean it up now.
8660                  * We first need to gain a new reference to the node to meet a
8661                  * requirement of decrement_reference().
8662                  */
8663                 new_reference(rbtdb, header->node);
8664                 decrement_reference(rbtdb, header->node, 0,
8665                                     isc_rwlocktype_write,
8666                                     tree_locked ? isc_rwlocktype_write :
8667                                     isc_rwlocktype_none, ISC_FALSE);
8668         }
8669 }