]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/bind9/lib/dns/rbtdb.c
Merge ACPICA 20110211.
[FreeBSD/FreeBSD.git] / contrib / bind9 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004-2010  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: rbtdb.c,v 1.270.12.26 2010-12-02 05:09:58 marka Exp $ */
19
20 /*! \file */
21
22 /*
23  * Principal Author: Bob Halley
24  */
25
26 #include <config.h>
27
28 /* #define inline */
29
30 #include <isc/event.h>
31 #include <isc/heap.h>
32 #include <isc/mem.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
41 #include <isc/task.h>
42 #include <isc/time.h>
43 #include <isc/util.h>
44
45 #include <dns/acache.h>
46 #include <dns/db.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
50 #include <dns/lib.h>
51 #include <dns/log.h>
52 #include <dns/masterdump.h>
53 #include <dns/nsec.h>
54 #include <dns/nsec3.h>
55 #include <dns/rbt.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
63 #include <dns/view.h>
64 #include <dns/zone.h>
65 #include <dns/zonekey.h>
66
67 #ifdef DNS_RBTDB_VERSION64
68 #include "rbtdb64.h"
69 #else
70 #include "rbtdb.h"
71 #endif
72
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
75 #else
76 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
77 #endif
78
79 /*%
80  * Note that "impmagic" is not the first four bytes of the struct, so
81  * ISC_MAGIC_VALID cannot be used.
82  */
83 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
84                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
85
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t                    rbtdb_serial_t;
88 /*%
89  * Make casting easier in symbolic debuggers by using different names
90  * for the 64 bit version.
91  */
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
95 #else
96 typedef isc_uint32_t                    rbtdb_serial_t;
97 #endif
98
99 typedef isc_uint32_t                    rbtdb_rdatatype_t;
100
101 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
104
105 #define RBTDB_RDATATYPE_SIGNSEC \
106                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_NCACHEANY \
116                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
117
118 /*
119  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
120  * Using rwlock is effective with regard to lookup performance only when
121  * it is implemented in an efficient way.
122  * Otherwise, it is generally wise to stick to the simple locking since rwlock
123  * would require more memory or can even make lookups slower due to its own
124  * overhead (when it internally calls mutex locks).
125  */
126 #ifdef ISC_RWLOCK_USEATOMIC
127 #define DNS_RBTDB_USERWLOCK 1
128 #else
129 #define DNS_RBTDB_USERWLOCK 0
130 #endif
131
132 #if DNS_RBTDB_USERWLOCK
133 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
134 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
135 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
136 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
137 #else
138 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
139 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
140 #define RBTDB_LOCK(l, t)        LOCK(l)
141 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
142 #endif
143
144 /*
145  * Since node locking is sensitive to both performance and memory footprint,
146  * we need some trick here.  If we have both high-performance rwlock and
147  * high performance and small-memory reference counters, we use rwlock for
148  * node lock and isc_refcount for node references.  In this case, we don't have
149  * to protect the access to the counters by locks.
150  * Otherwise, we simply use ordinary mutex lock for node locking, and use
151  * simple integers as reference counters which is protected by the lock.
152  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
153  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
154  * counters first and then protect other parts of a node as read-only data.
155  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
156  * provided for these special cases.  When we can use the efficient backend
157  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
158  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
159  * section including the access to the reference counter.
160  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
161  * section is also protected by NODE_STRONGLOCK().
162  */
163 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
164 typedef isc_rwlock_t nodelock_t;
165
166 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
167 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
168 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
169 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
170 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
171
172 #define NODE_STRONGLOCK(l)      ((void)0)
173 #define NODE_STRONGUNLOCK(l)    ((void)0)
174 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
175 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
176 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
177 #else
178 typedef isc_mutex_t nodelock_t;
179
180 #define NODE_INITLOCK(l)        isc_mutex_init(l)
181 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
182 #define NODE_LOCK(l, t)         LOCK(l)
183 #define NODE_UNLOCK(l, t)       UNLOCK(l)
184 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
185
186 #define NODE_STRONGLOCK(l)      LOCK(l)
187 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
188 #define NODE_WEAKLOCK(l, t)     ((void)0)
189 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
190 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
191 #endif
192
193 /*%
194  * Whether to rate-limit updating the LRU to avoid possible thread contention.
195  * Our performance measurement has shown the cost is marginal, so it's defined
196  * to be 0 by default either with or without threads.
197  */
198 #ifndef DNS_RBTDB_LIMITLRUUPDATE
199 #define DNS_RBTDB_LIMITLRUUPDATE 0
200 #endif
201
202 /*
203  * Allow clients with a virtual time of up to 5 minutes in the past to see
204  * records that would have otherwise have expired.
205  */
206 #define RBTDB_VIRTUAL 300
207
208 struct noqname {
209         dns_name_t      name;
210         void *          neg;
211         void *          negsig;
212         dns_rdatatype_t type;
213 };
214
215 typedef struct acachectl acachectl_t;
216
217 typedef struct rdatasetheader {
218         /*%
219          * Locked by the owning node's lock.
220          */
221         rbtdb_serial_t                  serial;
222         dns_ttl_t                       rdh_ttl;
223         rbtdb_rdatatype_t               type;
224         isc_uint16_t                    attributes;
225         dns_trust_t                     trust;
226         struct noqname                  *noqname;
227         struct noqname                  *closest;
228         /*%<
229          * We don't use the LIST macros, because the LIST structure has
230          * both head and tail pointers, and is doubly linked.
231          */
232
233         struct rdatasetheader           *next;
234         /*%<
235          * If this is the top header for an rdataset, 'next' points
236          * to the top header for the next rdataset (i.e., the next type).
237          * Otherwise, it points up to the header whose down pointer points
238          * at this header.
239          */
240
241         struct rdatasetheader           *down;
242         /*%<
243          * Points to the header for the next older version of
244          * this rdataset.
245          */
246
247         isc_uint32_t                    count;
248         /*%<
249          * Monotonously increased every time this rdataset is bound so that
250          * it is used as the base of the starting point in DNS responses
251          * when the "cyclic" rrset-order is required.  Since the ordering
252          * should not be so crucial, no lock is set for the counter for
253          * performance reasons.
254          */
255
256         acachectl_t                     *additional_auth;
257         acachectl_t                     *additional_glue;
258
259         dns_rbtnode_t                   *node;
260         isc_stdtime_t                   last_used;
261         ISC_LINK(struct rdatasetheader) link;
262
263         unsigned int                    heap_index;
264         /*%<
265          * Used for TTL-based cache cleaning.
266          */
267         isc_stdtime_t                   resign;
268 } rdatasetheader_t;
269
270 typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
271 typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;
272
273 #define RDATASET_ATTR_NONEXISTENT       0x0001
274 #define RDATASET_ATTR_STALE             0x0002
275 #define RDATASET_ATTR_IGNORE            0x0004
276 #define RDATASET_ATTR_RETAIN            0x0008
277 #define RDATASET_ATTR_NXDOMAIN          0x0010
278 #define RDATASET_ATTR_RESIGN            0x0020
279 #define RDATASET_ATTR_STATCOUNT         0x0040
280 #define RDATASET_ATTR_OPTOUT            0x0080
281
282 typedef struct acache_cbarg {
283         dns_rdatasetadditional_t        type;
284         unsigned int                    count;
285         dns_db_t                        *db;
286         dns_dbnode_t                    *node;
287         rdatasetheader_t                *header;
288 } acache_cbarg_t;
289
290 struct acachectl {
291         dns_acacheentry_t               *entry;
292         acache_cbarg_t                  *cbarg;
293 };
294
295 /*
296  * XXX
297  * When the cache will pre-expire data (due to memory low or other
298  * situations) before the rdataset's TTL has expired, it MUST
299  * respect the RETAIN bit and not expire the data until its TTL is
300  * expired.
301  */
302
303 #undef IGNORE                   /* WIN32 winbase.h defines this. */
304
305 #define EXISTS(header) \
306         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
307 #define NONEXISTENT(header) \
308         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
309 #define IGNORE(header) \
310         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
311 #define RETAIN(header) \
312         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
313 #define NXDOMAIN(header) \
314         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
315 #define RESIGN(header) \
316         (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
317 #define OPTOUT(header) \
318         (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
319
320 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
321
322 /*%
323  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
324  * There is a tradeoff issue about configuring this value: if this is too
325  * small, it may cause heavier contention between threads; if this is too large,
326  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
327  * The default value should work well for most environments, but this can
328  * also be configurable at compilation time via the
329  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
330  * 1 due to the assumption of overmem_purge().
331  */
332 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
333 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
334 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
335 #else
336 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
337 #endif
338 #else
339 #define DEFAULT_CACHE_NODE_LOCK_COUNT   16
340 #endif  /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
341
342 typedef struct {
343         nodelock_t                      lock;
344         /* Protected in the refcount routines. */
345         isc_refcount_t                  references;
346         /* Locked by lock. */
347         isc_boolean_t                   exiting;
348 } rbtdb_nodelock_t;
349
350 typedef struct rbtdb_changed {
351         dns_rbtnode_t *                 node;
352         isc_boolean_t                   dirty;
353         ISC_LINK(struct rbtdb_changed)  link;
354 } rbtdb_changed_t;
355
356 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
357
358 typedef enum {
359         dns_db_insecure,
360         dns_db_partial,
361         dns_db_secure
362 } dns_db_secure_t;
363
364 typedef struct rbtdb_version {
365         /* Not locked */
366         rbtdb_serial_t                  serial;
367         /*
368          * Protected in the refcount routines.
369          * XXXJT: should we change the lock policy based on the refcount
370          * performance?
371          */
372         isc_refcount_t                  references;
373         /* Locked by database lock. */
374         isc_boolean_t                   writer;
375         isc_boolean_t                   commit_ok;
376         rbtdb_changedlist_t             changed_list;
377         rdatasetheaderlist_t            resigned_list;
378         ISC_LINK(struct rbtdb_version)  link;
379         dns_db_secure_t                 secure;
380         isc_boolean_t                   havensec3;
381         /* NSEC3 parameters */
382         dns_hash_t                      hash;
383         isc_uint8_t                     flags;
384         isc_uint16_t                    iterations;
385         isc_uint8_t                     salt_length;
386         unsigned char                   salt[DNS_NSEC3_SALTSIZE];
387 } rbtdb_version_t;
388
389 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
390
391 typedef struct {
392         /* Unlocked. */
393         dns_db_t                        common;
394 #if DNS_RBTDB_USERWLOCK
395         isc_rwlock_t                    lock;
396 #else
397         isc_mutex_t                     lock;
398 #endif
399         isc_rwlock_t                    tree_lock;
400         unsigned int                    node_lock_count;
401         rbtdb_nodelock_t *              node_locks;
402         dns_rbtnode_t *                 origin_node;
403         dns_stats_t *                   rrsetstats; /* cache DB only */
404         /* Locked by lock. */
405         unsigned int                    active;
406         isc_refcount_t                  references;
407         unsigned int                    attributes;
408         rbtdb_serial_t                  current_serial;
409         rbtdb_serial_t                  least_serial;
410         rbtdb_serial_t                  next_serial;
411         rbtdb_version_t *               current_version;
412         rbtdb_version_t *               future_version;
413         rbtdb_versionlist_t             open_versions;
414         isc_task_t *                    task;
415         dns_dbnode_t                    *soanode;
416         dns_dbnode_t                    *nsnode;
417
418         /*
419          * This is a linked list used to implement the LRU cache.  There will
420          * be node_lock_count linked lists here.  Nodes in bucket 1 will be
421          * placed on the linked list rdatasets[1].
422          */
423         rdatasetheaderlist_t            *rdatasets;
424
425         /*%
426          * Temporary storage for stale cache nodes and dynamically deleted
427          * nodes that await being cleaned up.
428          */
429         rbtnodelist_t                   *deadnodes;
430
431         /*
432          * Heaps.  Each of these is used for TTL based expiry.
433          */
434         isc_heap_t                      **heaps;
435
436         /* Locked by tree_lock. */
437         dns_rbt_t *                     tree;
438         dns_rbt_t *                     nsec3;
439
440         /* Unlocked */
441         unsigned int                    quantum;
442 } dns_rbtdb_t;
443
444 #define RBTDB_ATTR_LOADED               0x01
445 #define RBTDB_ATTR_LOADING              0x02
446
447 /*%
448  * Search Context
449  */
450 typedef struct {
451         dns_rbtdb_t *           rbtdb;
452         rbtdb_version_t *       rbtversion;
453         rbtdb_serial_t          serial;
454         unsigned int            options;
455         dns_rbtnodechain_t      chain;
456         isc_boolean_t           copy_name;
457         isc_boolean_t           need_cleanup;
458         isc_boolean_t           wild;
459         dns_rbtnode_t *         zonecut;
460         rdatasetheader_t *      zonecut_rdataset;
461         rdatasetheader_t *      zonecut_sigrdataset;
462         dns_fixedname_t         zonecut_name;
463         isc_stdtime_t           now;
464 } rbtdb_search_t;
465
466 /*%
467  * Load Context
468  */
469 typedef struct {
470         dns_rbtdb_t *           rbtdb;
471         isc_stdtime_t           now;
472 } rbtdb_load_t;
473
474 static void rdataset_disassociate(dns_rdataset_t *rdataset);
475 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
476 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
477 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
478 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
479 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
480 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
481                                         dns_name_t *name,
482                                         dns_rdataset_t *neg,
483                                         dns_rdataset_t *negsig);
484 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
485                                         dns_name_t *name,
486                                         dns_rdataset_t *neg,
487                                         dns_rdataset_t *negsig);
488 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
489                                            dns_rdatasetadditional_t type,
490                                            dns_rdatatype_t qtype,
491                                            dns_acache_t *acache,
492                                            dns_zone_t **zonep,
493                                            dns_db_t **dbp,
494                                            dns_dbversion_t **versionp,
495                                            dns_dbnode_t **nodep,
496                                            dns_name_t *fname,
497                                            dns_message_t *msg,
498                                            isc_stdtime_t now);
499 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
500                                            dns_rdatasetadditional_t type,
501                                            dns_rdatatype_t qtype,
502                                            dns_acache_t *acache,
503                                            dns_zone_t *zone,
504                                            dns_db_t *db,
505                                            dns_dbversion_t *version,
506                                            dns_dbnode_t *node,
507                                            dns_name_t *fname);
508 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
509                                            dns_rdataset_t *rdataset,
510                                            dns_rdatasetadditional_t type,
511                                            dns_rdatatype_t qtype);
512 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
513                                               isc_stdtime_t now);
514 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
515                           isc_stdtime_t now);
516 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
517                           isc_boolean_t tree_locked);
518 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
519                           isc_stdtime_t now, isc_boolean_t tree_locked);
520 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
521                                   rdatasetheader_t *newheader);
522 static void prune_tree(isc_task_t *task, isc_event_t *event);
523 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
524 static void rdataset_expire(dns_rdataset_t *rdataset);
525
526 static dns_rdatasetmethods_t rdataset_methods = {
527         rdataset_disassociate,
528         rdataset_first,
529         rdataset_next,
530         rdataset_current,
531         rdataset_clone,
532         rdataset_count,
533         NULL,
534         rdataset_getnoqname,
535         NULL,
536         rdataset_getclosest,
537         rdataset_getadditional,
538         rdataset_setadditional,
539         rdataset_putadditional,
540         rdataset_settrust,
541         rdataset_expire
542 };
543
544 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
545 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
546 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
547 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
548                                  dns_rdataset_t *rdataset);
549
550 static dns_rdatasetitermethods_t rdatasetiter_methods = {
551         rdatasetiter_destroy,
552         rdatasetiter_first,
553         rdatasetiter_next,
554         rdatasetiter_current
555 };
556
557 typedef struct rbtdb_rdatasetiter {
558         dns_rdatasetiter_t              common;
559         rdatasetheader_t *              current;
560 } rbtdb_rdatasetiter_t;
561
562 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
563 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
564 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
565 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
566                                         dns_name_t *name);
567 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
568 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
569 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
570                                            dns_dbnode_t **nodep,
571                                            dns_name_t *name);
572 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
573 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
574                                           dns_name_t *name);
575
576 static dns_dbiteratormethods_t dbiterator_methods = {
577         dbiterator_destroy,
578         dbiterator_first,
579         dbiterator_last,
580         dbiterator_seek,
581         dbiterator_prev,
582         dbiterator_next,
583         dbiterator_current,
584         dbiterator_pause,
585         dbiterator_origin
586 };
587
588 #define DELETION_BATCH_MAX 64
589
590 /*
591  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
592  */
593 typedef struct rbtdb_dbiterator {
594         dns_dbiterator_t                common;
595         isc_boolean_t                   paused;
596         isc_boolean_t                   new_origin;
597         isc_rwlocktype_t                tree_locked;
598         isc_result_t                    result;
599         dns_fixedname_t                 name;
600         dns_fixedname_t                 origin;
601         dns_rbtnodechain_t              chain;
602         dns_rbtnodechain_t              nsec3chain;
603         dns_rbtnodechain_t              *current;
604         dns_rbtnode_t                   *node;
605         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
606         int                             delete;
607         isc_boolean_t                   nsec3only;
608         isc_boolean_t                   nonsec3;
609 } rbtdb_dbiterator_t;
610
611
612 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
613 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
614
615 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
616                        isc_event_t *event);
617 static void overmem(dns_db_t *db, isc_boolean_t overmem);
618 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
619                                isc_boolean_t *nsec3createflag);
620
621 /*%
622  * 'init_count' is used to initialize 'newheader->count' which inturn
623  * is used to determine where in the cycle rrset-order cyclic starts.
624  * We don't lock this as we don't care about simultaneous updates.
625  *
626  * Note:
627  *      Both init_count and header->count can be ISC_UINT32_MAX.
628  *      The count on the returned rdataset however can't be as
629  *      that indicates that the database does not implement cyclic
630  *      processing.
631  */
632 static unsigned int init_count;
633
634 /*
635  * Locking
636  *
637  * If a routine is going to lock more than one lock in this module, then
638  * the locking must be done in the following order:
639  *
640  *      Tree Lock
641  *
642  *      Node Lock       (Only one from the set may be locked at one time by
643  *                       any caller)
644  *
645  *      Database Lock
646  *
647  * Failure to follow this hierarchy can result in deadlock.
648  */
649
650 /*
651  * Deleting Nodes
652  *
653  * For zone databases the node for the origin of the zone MUST NOT be deleted.
654  */
655
656
657 /*
658  * DB Routines
659  */
660
661 static void
662 attach(dns_db_t *source, dns_db_t **targetp) {
663         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
664
665         REQUIRE(VALID_RBTDB(rbtdb));
666
667         isc_refcount_increment(&rbtdb->references, NULL);
668
669         *targetp = source;
670 }
671
672 static void
673 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
674         dns_rbtdb_t *rbtdb = event->ev_arg;
675
676         UNUSED(task);
677
678         free_rbtdb(rbtdb, ISC_TRUE, event);
679 }
680
681 static void
682 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
683                   isc_boolean_t increment)
684 {
685         dns_rdatastatstype_t statattributes = 0;
686         dns_rdatastatstype_t base = 0;
687         dns_rdatastatstype_t type;
688
689         /* At the moment we count statistics only for cache DB */
690         INSIST(IS_CACHE(rbtdb));
691
692         if (NXDOMAIN(header))
693                 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
694         else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
695                 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
696                 base = RBTDB_RDATATYPE_EXT(header->type);
697         } else
698                 base = RBTDB_RDATATYPE_BASE(header->type);
699
700         type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
701         if (increment)
702                 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
703         else
704                 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
705 }
706
707 static void
708 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
709         int idx;
710         isc_heap_t *heap;
711         dns_ttl_t oldttl;
712
713         oldttl = header->rdh_ttl;
714         header->rdh_ttl = newttl;
715
716         if (!IS_CACHE(rbtdb))
717                 return;
718
719         /*
720          * It's possible the rbtdb is not a cache.  If this is the case,
721          * we will not have a heap, and we move on.  If we do, though,
722          * we might need to adjust things.
723          */
724         if (header->heap_index == 0 || newttl == oldttl)
725                 return;
726         idx = header->node->locknum;
727         if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
728             return;
729         heap = rbtdb->heaps[idx];
730
731         if (newttl < oldttl)
732                 isc_heap_increased(heap, header->heap_index);
733         else
734                 isc_heap_decreased(heap, header->heap_index);
735 }
736
737 /*%
738  * These functions allow the heap code to rank the priority of each
739  * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
740  */
741 static isc_boolean_t
742 ttl_sooner(void *v1, void *v2) {
743         rdatasetheader_t *h1 = v1;
744         rdatasetheader_t *h2 = v2;
745
746         if (h1->rdh_ttl < h2->rdh_ttl)
747                 return (ISC_TRUE);
748         return (ISC_FALSE);
749 }
750
751 static isc_boolean_t
752 resign_sooner(void *v1, void *v2) {
753         rdatasetheader_t *h1 = v1;
754         rdatasetheader_t *h2 = v2;
755
756         if (h1->resign < h2->resign)
757                 return (ISC_TRUE);
758         return (ISC_FALSE);
759 }
760
761 /*%
762  * This function sets the heap index into the header.
763  */
764 static void
765 set_index(void *what, unsigned int index) {
766         rdatasetheader_t *h = what;
767
768         h->heap_index = index;
769 }
770
771 /*%
772  * Work out how many nodes can be deleted in the time between two
773  * requests to the nameserver.  Smooth the resulting number and use it
774  * as a estimate for the number of nodes to be deleted in the next
775  * iteration.
776  */
777 static unsigned int
778 adjust_quantum(unsigned int old, isc_time_t *start) {
779         unsigned int pps = dns_pps;     /* packets per second */
780         unsigned int interval;
781         isc_uint64_t usecs;
782         isc_time_t end;
783         unsigned int new;
784
785         if (pps < 100)
786                 pps = 100;
787         isc_time_now(&end);
788
789         interval = 1000000 / pps;       /* interval in usec */
790         if (interval == 0)
791                 interval = 1;
792         usecs = isc_time_microdiff(&end, start);
793         if (usecs == 0) {
794                 /*
795                  * We were unable to measure the amount of time taken.
796                  * Double the nodes deleted next time.
797                  */
798                 old *= 2;
799                 if (old > 1000)
800                         old = 1000;
801                 return (old);
802         }
803         new = old * interval;
804         new /= (unsigned int)usecs;
805         if (new == 0)
806                 new = 1;
807         else if (new > 1000)
808                 new = 1000;
809
810         /* Smooth */
811         new = (new + old * 3) / 4;
812
813         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
814                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
815
816         return (new);
817 }
818
819 static void
820 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
821         unsigned int i;
822         isc_ondestroy_t ondest;
823         isc_result_t result;
824         char buf[DNS_NAME_FORMATSIZE];
825         isc_time_t start;
826
827         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
828                 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
829
830         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
831         REQUIRE(rbtdb->future_version == NULL);
832
833         if (rbtdb->current_version != NULL) {
834                 unsigned int refs;
835
836                 isc_refcount_decrement(&rbtdb->current_version->references,
837                                        &refs);
838                 INSIST(refs == 0);
839                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
840                 isc_refcount_destroy(&rbtdb->current_version->references);
841                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
842                             sizeof(rbtdb_version_t));
843         }
844
845         /*
846          * We assume the number of remaining dead nodes is reasonably small;
847          * the overhead of unlinking all nodes here should be negligible.
848          */
849         for (i = 0; i < rbtdb->node_lock_count; i++) {
850                 dns_rbtnode_t *node;
851
852                 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
853                 while (node != NULL) {
854                         ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
855                         node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
856                 }
857         }
858
859         if (event == NULL)
860                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
861  again:
862         if (rbtdb->tree != NULL) {
863                 isc_time_now(&start);
864                 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
865                 if (result == ISC_R_QUOTA) {
866                         INSIST(rbtdb->task != NULL);
867                         if (rbtdb->quantum != 0)
868                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
869                                                                 &start);
870                         if (event == NULL)
871                                 event = isc_event_allocate(rbtdb->common.mctx,
872                                                            NULL,
873                                                          DNS_EVENT_FREESTORAGE,
874                                                            free_rbtdb_callback,
875                                                            rbtdb,
876                                                            sizeof(isc_event_t));
877                         if (event == NULL)
878                                 goto again;
879                         isc_task_send(rbtdb->task, &event);
880                         return;
881                 }
882                 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
883         }
884
885         if (rbtdb->nsec3 != NULL) {
886                 isc_time_now(&start);
887                 result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
888                 if (result == ISC_R_QUOTA) {
889                         INSIST(rbtdb->task != NULL);
890                         if (rbtdb->quantum != 0)
891                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
892                                                                 &start);
893                         if (event == NULL)
894                                 event = isc_event_allocate(rbtdb->common.mctx,
895                                                            NULL,
896                                                          DNS_EVENT_FREESTORAGE,
897                                                            free_rbtdb_callback,
898                                                            rbtdb,
899                                                            sizeof(isc_event_t));
900                         if (event == NULL)
901                                 goto again;
902                         isc_task_send(rbtdb->task, &event);
903                         return;
904                 }
905                 INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
906         }
907
908         if (event != NULL)
909                 isc_event_free(&event);
910         if (log) {
911                 if (dns_name_dynamic(&rbtdb->common.origin))
912                         dns_name_format(&rbtdb->common.origin, buf,
913                                         sizeof(buf));
914                 else
915                         strcpy(buf, "<UNKNOWN>");
916                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
917                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
918                               "done free_rbtdb(%s)", buf);
919         }
920         if (dns_name_dynamic(&rbtdb->common.origin))
921                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
922         for (i = 0; i < rbtdb->node_lock_count; i++) {
923                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
924                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
925         }
926
927         /*
928          * Clean up LRU / re-signing order lists.
929          */
930         if (rbtdb->rdatasets != NULL) {
931                 for (i = 0; i < rbtdb->node_lock_count; i++)
932                         INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
933                 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
934                             rbtdb->node_lock_count *
935                             sizeof(rdatasetheaderlist_t));
936         }
937         /*
938          * Clean up dead node buckets.
939          */
940         if (rbtdb->deadnodes != NULL) {
941                 for (i = 0; i < rbtdb->node_lock_count; i++)
942                         INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
943                 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
944                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
945         }
946         /*
947          * Clean up heap objects.
948          */
949         if (rbtdb->heaps != NULL) {
950                 for (i = 0; i < rbtdb->node_lock_count; i++)
951                         isc_heap_destroy(&rbtdb->heaps[i]);
952                 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
953                             rbtdb->node_lock_count *
954                             sizeof(isc_heap_t *));
955         }
956
957         if (rbtdb->rrsetstats != NULL)
958                 dns_stats_detach(&rbtdb->rrsetstats);
959
960         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
961                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
962         isc_rwlock_destroy(&rbtdb->tree_lock);
963         isc_refcount_destroy(&rbtdb->references);
964         if (rbtdb->task != NULL)
965                 isc_task_detach(&rbtdb->task);
966
967         RBTDB_DESTROYLOCK(&rbtdb->lock);
968         rbtdb->common.magic = 0;
969         rbtdb->common.impmagic = 0;
970         ondest = rbtdb->common.ondest;
971         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
972         isc_ondestroy_notify(&ondest, rbtdb);
973 }
974
975 static inline void
976 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
977         isc_boolean_t want_free = ISC_FALSE;
978         unsigned int i;
979         unsigned int inactive = 0;
980
981         /* XXX check for open versions here */
982
983         if (rbtdb->soanode != NULL)
984                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
985         if (rbtdb->nsnode != NULL)
986                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
987
988         /*
989          * Even though there are no external direct references, there still
990          * may be nodes in use.
991          */
992         for (i = 0; i < rbtdb->node_lock_count; i++) {
993                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
994                 rbtdb->node_locks[i].exiting = ISC_TRUE;
995                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
996                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
997                     == 0) {
998                         inactive++;
999                 }
1000         }
1001
1002         if (inactive != 0) {
1003                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1004                 rbtdb->active -= inactive;
1005                 if (rbtdb->active == 0)
1006                         want_free = ISC_TRUE;
1007                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1008                 if (want_free) {
1009                         char buf[DNS_NAME_FORMATSIZE];
1010                         if (dns_name_dynamic(&rbtdb->common.origin))
1011                                 dns_name_format(&rbtdb->common.origin, buf,
1012                                                 sizeof(buf));
1013                         else
1014                                 strcpy(buf, "<UNKNOWN>");
1015                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1016                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1017                                       "calling free_rbtdb(%s)", buf);
1018                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
1019                 }
1020         }
1021 }
1022
1023 static void
1024 detach(dns_db_t **dbp) {
1025         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1026         unsigned int refs;
1027
1028         REQUIRE(VALID_RBTDB(rbtdb));
1029
1030         isc_refcount_decrement(&rbtdb->references, &refs);
1031
1032         if (refs == 0)
1033                 maybe_free_rbtdb(rbtdb);
1034
1035         *dbp = NULL;
1036 }
1037
1038 static void
1039 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1040         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1041         rbtdb_version_t *version;
1042         unsigned int refs;
1043
1044         REQUIRE(VALID_RBTDB(rbtdb));
1045
1046         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1047         version = rbtdb->current_version;
1048         isc_refcount_increment(&version->references, &refs);
1049         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1050
1051         *versionp = (dns_dbversion_t *)version;
1052 }
1053
1054 static inline rbtdb_version_t *
1055 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1056                  unsigned int references, isc_boolean_t writer)
1057 {
1058         isc_result_t result;
1059         rbtdb_version_t *version;
1060
1061         version = isc_mem_get(mctx, sizeof(*version));
1062         if (version == NULL)
1063                 return (NULL);
1064         version->serial = serial;
1065         result = isc_refcount_init(&version->references, references);
1066         if (result != ISC_R_SUCCESS) {
1067                 isc_mem_put(mctx, version, sizeof(*version));
1068                 return (NULL);
1069         }
1070         version->writer = writer;
1071         version->commit_ok = ISC_FALSE;
1072         ISC_LIST_INIT(version->changed_list);
1073         ISC_LIST_INIT(version->resigned_list);
1074         ISC_LINK_INIT(version, link);
1075
1076         return (version);
1077 }
1078
1079 static isc_result_t
1080 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1081         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1082         rbtdb_version_t *version;
1083
1084         REQUIRE(VALID_RBTDB(rbtdb));
1085         REQUIRE(versionp != NULL && *versionp == NULL);
1086         REQUIRE(rbtdb->future_version == NULL);
1087
1088         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1089         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
1090         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1091                                    ISC_TRUE);
1092         if (version != NULL) {
1093                 version->commit_ok = ISC_TRUE;
1094                 version->secure = rbtdb->current_version->secure;
1095                 version->havensec3 = rbtdb->current_version->havensec3;
1096                 if (version->havensec3) {
1097                         version->flags = rbtdb->current_version->flags;
1098                         version->iterations =
1099                                 rbtdb->current_version->iterations;
1100                         version->hash = rbtdb->current_version->hash;
1101                         version->salt_length =
1102                                 rbtdb->current_version->salt_length;
1103                         memcpy(version->salt, rbtdb->current_version->salt,
1104                                version->salt_length);
1105                 } else {
1106                         version->flags = 0;
1107                         version->iterations = 0;
1108                         version->hash = 0;
1109                         version->salt_length = 0;
1110                         memset(version->salt, 0, sizeof(version->salt));
1111                 }
1112                 rbtdb->next_serial++;
1113                 rbtdb->future_version = version;
1114         }
1115         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1116
1117         if (version == NULL)
1118                 return (ISC_R_NOMEMORY);
1119
1120         *versionp = version;
1121
1122         return (ISC_R_SUCCESS);
1123 }
1124
1125 static void
1126 attachversion(dns_db_t *db, dns_dbversion_t *source,
1127               dns_dbversion_t **targetp)
1128 {
1129         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1130         rbtdb_version_t *rbtversion = source;
1131         unsigned int refs;
1132
1133         REQUIRE(VALID_RBTDB(rbtdb));
1134
1135         isc_refcount_increment(&rbtversion->references, &refs);
1136         INSIST(refs > 1);
1137
1138         *targetp = rbtversion;
1139 }
1140
1141 static rbtdb_changed_t *
1142 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1143             dns_rbtnode_t *node)
1144 {
1145         rbtdb_changed_t *changed;
1146         unsigned int refs;
1147
1148         /*
1149          * Caller must be holding the node lock if its reference must be
1150          * protected by the lock.
1151          */
1152
1153         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1154
1155         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1156
1157         REQUIRE(version->writer);
1158
1159         if (changed != NULL) {
1160                 dns_rbtnode_refincrement(node, &refs);
1161                 INSIST(refs != 0);
1162                 changed->node = node;
1163                 changed->dirty = ISC_FALSE;
1164                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1165         } else
1166                 version->commit_ok = ISC_FALSE;
1167
1168         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1169
1170         return (changed);
1171 }
1172
1173 static void
1174 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1175                  acachectl_t *array)
1176 {
1177         unsigned int count;
1178         unsigned int i;
1179         unsigned char *raw;     /* RDATASLAB */
1180
1181         /*
1182          * The caller must be holding the corresponding node lock.
1183          */
1184
1185         if (array == NULL)
1186                 return;
1187
1188         raw = (unsigned char *)header + sizeof(*header);
1189         count = raw[0] * 256 + raw[1];
1190
1191         /*
1192          * Sanity check: since an additional cache entry has a reference to
1193          * the original DB node (in the callback arg), there should be no
1194          * acache entries when the node can be freed.
1195          */
1196         for (i = 0; i < count; i++)
1197                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1198
1199         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1200 }
1201
1202 static inline void
1203 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1204
1205         if (dns_name_dynamic(&(*noqname)->name))
1206                 dns_name_free(&(*noqname)->name, mctx);
1207         if ((*noqname)->neg != NULL)
1208                 isc_mem_put(mctx, (*noqname)->neg,
1209                             dns_rdataslab_size((*noqname)->neg, 0));
1210         if ((*noqname)->negsig != NULL)
1211                 isc_mem_put(mctx, (*noqname)->negsig,
1212                             dns_rdataslab_size((*noqname)->negsig, 0));
1213         isc_mem_put(mctx, *noqname, sizeof(**noqname));
1214         *noqname = NULL;
1215 }
1216
1217 static inline void
1218 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1219 {
1220         ISC_LINK_INIT(h, link);
1221         h->heap_index = 0;
1222
1223 #if TRACE_HEADER
1224         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1225                 fprintf(stderr, "initialized header: %p\n", h);
1226 #else
1227         UNUSED(rbtdb);
1228 #endif
1229 }
1230
1231 static inline rdatasetheader_t *
1232 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1233 {
1234         rdatasetheader_t *h;
1235
1236         h = isc_mem_get(mctx, sizeof(*h));
1237         if (h == NULL)
1238                 return (NULL);
1239
1240 #if TRACE_HEADER
1241         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1242                 fprintf(stderr, "allocated header: %p\n", h);
1243 #endif
1244         init_rdataset(rbtdb, h);
1245         return (h);
1246 }
1247
1248 static inline void
1249 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1250 {
1251         unsigned int size;
1252         int idx;
1253
1254         if (EXISTS(rdataset) &&
1255             (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1256                 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1257         }
1258
1259         idx = rdataset->node->locknum;
1260         if (ISC_LINK_LINKED(rdataset, link)) {
1261                 INSIST(IS_CACHE(rbtdb));
1262                 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1263         }
1264         if (rdataset->heap_index != 0)
1265                 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1266         rdataset->heap_index = 0;
1267
1268         if (rdataset->noqname != NULL)
1269                 free_noqname(mctx, &rdataset->noqname);
1270         if (rdataset->closest != NULL)
1271                 free_noqname(mctx, &rdataset->closest);
1272
1273         free_acachearray(mctx, rdataset, rdataset->additional_auth);
1274         free_acachearray(mctx, rdataset, rdataset->additional_glue);
1275
1276         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1277                 size = sizeof(*rdataset);
1278         else
1279                 size = dns_rdataslab_size((unsigned char *)rdataset,
1280                                           sizeof(*rdataset));
1281         isc_mem_put(mctx, rdataset, size);
1282 }
1283
1284 static inline void
1285 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1286         rdatasetheader_t *header, *dcurrent;
1287         isc_boolean_t make_dirty = ISC_FALSE;
1288
1289         /*
1290          * Caller must hold the node lock.
1291          */
1292
1293         /*
1294          * We set the IGNORE attribute on rdatasets with serial number
1295          * 'serial'.  When the reference count goes to zero, these rdatasets
1296          * will be cleaned up; until that time, they will be ignored.
1297          */
1298         for (header = node->data; header != NULL; header = header->next) {
1299                 if (header->serial == serial) {
1300                         header->attributes |= RDATASET_ATTR_IGNORE;
1301                         make_dirty = ISC_TRUE;
1302                 }
1303                 for (dcurrent = header->down;
1304                      dcurrent != NULL;
1305                      dcurrent = dcurrent->down) {
1306                         if (dcurrent->serial == serial) {
1307                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1308                                 make_dirty = ISC_TRUE;
1309                         }
1310                 }
1311         }
1312         if (make_dirty)
1313                 node->dirty = 1;
1314 }
1315
1316 static inline void
1317 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1318 {
1319         rdatasetheader_t *d, *down_next;
1320
1321         for (d = top->down; d != NULL; d = down_next) {
1322                 down_next = d->down;
1323                 free_rdataset(rbtdb, mctx, d);
1324         }
1325         top->down = NULL;
1326 }
1327
1328 static inline void
1329 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1330         rdatasetheader_t *current, *top_prev, *top_next;
1331         isc_mem_t *mctx = rbtdb->common.mctx;
1332
1333         /*
1334          * Caller must be holding the node lock.
1335          */
1336
1337         top_prev = NULL;
1338         for (current = node->data; current != NULL; current = top_next) {
1339                 top_next = current->next;
1340                 clean_stale_headers(rbtdb, mctx, current);
1341                 /*
1342                  * If current is nonexistent or stale, we can clean it up.
1343                  */
1344                 if ((current->attributes &
1345                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1346                         if (top_prev != NULL)
1347                                 top_prev->next = current->next;
1348                         else
1349                                 node->data = current->next;
1350                         free_rdataset(rbtdb, mctx, current);
1351                 } else
1352                         top_prev = current;
1353         }
1354         node->dirty = 0;
1355 }
1356
1357 static inline void
1358 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1359                 rbtdb_serial_t least_serial)
1360 {
1361         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1362         rdatasetheader_t *top_prev, *top_next;
1363         isc_mem_t *mctx = rbtdb->common.mctx;
1364         isc_boolean_t still_dirty = ISC_FALSE;
1365
1366         /*
1367          * Caller must be holding the node lock.
1368          */
1369         REQUIRE(least_serial != 0);
1370
1371         top_prev = NULL;
1372         for (current = node->data; current != NULL; current = top_next) {
1373                 top_next = current->next;
1374
1375                 /*
1376                  * First, we clean up any instances of multiple rdatasets
1377                  * with the same serial number, or that have the IGNORE
1378                  * attribute.
1379                  */
1380                 dparent = current;
1381                 for (dcurrent = current->down;
1382                      dcurrent != NULL;
1383                      dcurrent = down_next) {
1384                         down_next = dcurrent->down;
1385                         INSIST(dcurrent->serial <= dparent->serial);
1386                         if (dcurrent->serial == dparent->serial ||
1387                             IGNORE(dcurrent)) {
1388                                 if (down_next != NULL)
1389                                         down_next->next = dparent;
1390                                 dparent->down = down_next;
1391                                 free_rdataset(rbtdb, mctx, dcurrent);
1392                         } else
1393                                 dparent = dcurrent;
1394                 }
1395
1396                 /*
1397                  * We've now eliminated all IGNORE datasets with the possible
1398                  * exception of current, which we now check.
1399                  */
1400                 if (IGNORE(current)) {
1401                         down_next = current->down;
1402                         if (down_next == NULL) {
1403                                 if (top_prev != NULL)
1404                                         top_prev->next = current->next;
1405                                 else
1406                                         node->data = current->next;
1407                                 free_rdataset(rbtdb, mctx, current);
1408                                 /*
1409                                  * current no longer exists, so we can
1410                                  * just continue with the loop.
1411                                  */
1412                                 continue;
1413                         } else {
1414                                 /*
1415                                  * Pull up current->down, making it the new
1416                                  * current.
1417                                  */
1418                                 if (top_prev != NULL)
1419                                         top_prev->next = down_next;
1420                                 else
1421                                         node->data = down_next;
1422                                 down_next->next = top_next;
1423                                 free_rdataset(rbtdb, mctx, current);
1424                                 current = down_next;
1425                         }
1426                 }
1427
1428                 /*
1429                  * We now try to find the first down node less than the
1430                  * least serial.
1431                  */
1432                 dparent = current;
1433                 for (dcurrent = current->down;
1434                      dcurrent != NULL;
1435                      dcurrent = down_next) {
1436                         down_next = dcurrent->down;
1437                         if (dcurrent->serial < least_serial)
1438                                 break;
1439                         dparent = dcurrent;
1440                 }
1441
1442                 /*
1443                  * If there is a such an rdataset, delete it and any older
1444                  * versions.
1445                  */
1446                 if (dcurrent != NULL) {
1447                         do {
1448                                 down_next = dcurrent->down;
1449                                 INSIST(dcurrent->serial <= least_serial);
1450                                 free_rdataset(rbtdb, mctx, dcurrent);
1451                                 dcurrent = down_next;
1452                         } while (dcurrent != NULL);
1453                         dparent->down = NULL;
1454                 }
1455
1456                 /*
1457                  * Note.  The serial number of 'current' might be less than
1458                  * least_serial too, but we cannot delete it because it is
1459                  * the most recent version, unless it is a NONEXISTENT
1460                  * rdataset.
1461                  */
1462                 if (current->down != NULL) {
1463                         still_dirty = ISC_TRUE;
1464                         top_prev = current;
1465                 } else {
1466                         /*
1467                          * If this is a NONEXISTENT rdataset, we can delete it.
1468                          */
1469                         if (NONEXISTENT(current)) {
1470                                 if (top_prev != NULL)
1471                                         top_prev->next = current->next;
1472                                 else
1473                                         node->data = current->next;
1474                                 free_rdataset(rbtdb, mctx, current);
1475                         } else
1476                                 top_prev = current;
1477                 }
1478         }
1479         if (!still_dirty)
1480                 node->dirty = 0;
1481 }
1482
1483 /*%
1484  * Clean up dead nodes.  These are nodes which have no references, and
1485  * have no data.  They are dead but we could not or chose not to delete
1486  * them when we deleted all the data at that node because we did not want
1487  * to wait for the tree write lock.
1488  *
1489  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1490  */
1491 static void
1492 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1493         dns_rbtnode_t *node;
1494         isc_result_t result;
1495         int count = 10;         /* XXXJT: should be adjustable */
1496
1497         node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1498         while (node != NULL && count > 0) {
1499                 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1500
1501                 /*
1502                  * Since we're holding a tree write lock, it should be
1503                  * impossible for this node to be referenced by others.
1504                  */
1505                 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1506                        node->data == NULL);
1507
1508                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1509                 if (node->nsec3)
1510                         result = dns_rbt_deletenode(rbtdb->nsec3, node,
1511                                                     ISC_FALSE);
1512                 else
1513                         result = dns_rbt_deletenode(rbtdb->tree, node,
1514                                                     ISC_FALSE);
1515                 if (result != ISC_R_SUCCESS)
1516                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1517                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1518                                       "cleanup_dead_nodes: "
1519                                       "dns_rbt_deletenode: %s",
1520                                       isc_result_totext(result));
1521                 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1522                 count--;
1523         }
1524 }
1525
1526 /*
1527  * Caller must be holding the node lock if its reference must be protected
1528  * by the lock.
1529  */
1530 static inline void
1531 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1532         unsigned int lockrefs, noderefs;
1533         isc_refcount_t *lockref;
1534
1535         dns_rbtnode_refincrement0(node, &noderefs);
1536         if (noderefs == 1) {    /* this is the first reference to the node */
1537                 lockref = &rbtdb->node_locks[node->locknum].references;
1538                 isc_refcount_increment0(lockref, &lockrefs);
1539                 INSIST(lockrefs != 0);
1540         }
1541         INSIST(noderefs != 0);
1542 }
1543
1544 /*
1545  * This function is assumed to be called when a node is newly referenced
1546  * and can be in the deadnode list.  In that case the node must be retrieved
1547  * from the list because it is going to be used.  In addition, if the caller
1548  * happens to hold a write lock on the tree, it's a good chance to purge dead
1549  * nodes.
1550  * Note: while a new reference is gained in multiple places, there are only very
1551  * few cases where the node can be in the deadnode list (only empty nodes can
1552  * have been added to the list).
1553  */
1554 static inline void
1555 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1556                 isc_rwlocktype_t treelocktype)
1557 {
1558         isc_boolean_t need_relock = ISC_FALSE;
1559
1560         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1561         new_reference(rbtdb, node);
1562
1563         NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1564                       isc_rwlocktype_read);
1565         if (ISC_LINK_LINKED(node, deadlink))
1566                 need_relock = ISC_TRUE;
1567         else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1568                  treelocktype == isc_rwlocktype_write)
1569                 need_relock = ISC_TRUE;
1570         NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1571                         isc_rwlocktype_read);
1572         if (need_relock) {
1573                 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1574                               isc_rwlocktype_write);
1575                 if (ISC_LINK_LINKED(node, deadlink))
1576                         ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1577                                         node, deadlink);
1578                 if (treelocktype == isc_rwlocktype_write)
1579                         cleanup_dead_nodes(rbtdb, node->locknum);
1580                 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1581                                 isc_rwlocktype_write);
1582         }
1583
1584         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1585 }
1586
1587 /*
1588  * Caller must be holding the node lock; either the "strong", read or write
1589  * lock.  Note that the lock must be held even when node references are
1590  * atomically modified; in that case the decrement operation itself does not
1591  * have to be protected, but we must avoid a race condition where multiple
1592  * threads are decreasing the reference to zero simultaneously and at least
1593  * one of them is going to free the node.
1594  * This function returns ISC_TRUE if and only if the node reference decreases
1595  * to zero.
1596  */
1597 static isc_boolean_t
1598 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1599                     rbtdb_serial_t least_serial,
1600                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1601                     isc_boolean_t pruning)
1602 {
1603         isc_result_t result;
1604         isc_boolean_t write_locked;
1605         rbtdb_nodelock_t *nodelock;
1606         unsigned int refs, nrefs;
1607         int bucket = node->locknum;
1608         isc_boolean_t no_reference;
1609
1610         nodelock = &rbtdb->node_locks[bucket];
1611
1612         /* Handle easy and typical case first. */
1613         if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1614                 dns_rbtnode_refdecrement(node, &nrefs);
1615                 INSIST((int)nrefs >= 0);
1616                 if (nrefs == 0) {
1617                         isc_refcount_decrement(&nodelock->references, &refs);
1618                         INSIST((int)refs >= 0);
1619                 }
1620                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1621         }
1622
1623         /* Upgrade the lock? */
1624         if (nlock == isc_rwlocktype_read) {
1625                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1626                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1627         }
1628         dns_rbtnode_refdecrement(node, &nrefs);
1629         INSIST((int)nrefs >= 0);
1630         if (nrefs > 0) {
1631                 /* Restore the lock? */
1632                 if (nlock == isc_rwlocktype_read)
1633                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1634                 return (ISC_FALSE);
1635         }
1636
1637         if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1638                 if (IS_CACHE(rbtdb))
1639                         clean_cache_node(rbtdb, node);
1640                 else {
1641                         if (least_serial == 0) {
1642                                 /*
1643                                  * Caller doesn't know the least serial.
1644                                  * Get it.
1645                                  */
1646                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1647                                 least_serial = rbtdb->least_serial;
1648                                 RBTDB_UNLOCK(&rbtdb->lock,
1649                                              isc_rwlocktype_read);
1650                         }
1651                         clean_zone_node(rbtdb, node, least_serial);
1652                 }
1653         }
1654
1655         isc_refcount_decrement(&nodelock->references, &refs);
1656         INSIST((int)refs >= 0);
1657
1658         /*
1659          * XXXDCL should this only be done for cache zones?
1660          */
1661         if (node->data != NULL || node->down != NULL) {
1662                 /* Restore the lock? */
1663                 if (nlock == isc_rwlocktype_read)
1664                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1665                 return (ISC_TRUE);
1666         }
1667
1668         /*
1669          * Attempt to switch to a write lock on the tree.  If this fails,
1670          * we will add this node to a linked list of nodes in this locking
1671          * bucket which we will free later.
1672          */
1673         if (tlock != isc_rwlocktype_write) {
1674                 /*
1675                  * Locking hierarchy notwithstanding, we don't need to free
1676                  * the node lock before acquiring the tree write lock because
1677                  * we only do a trylock.
1678                  */
1679                 if (tlock == isc_rwlocktype_read)
1680                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1681                 else
1682                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
1683                                                     isc_rwlocktype_write);
1684                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1685                               result == ISC_R_LOCKBUSY);
1686
1687                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1688         } else
1689                 write_locked = ISC_TRUE;
1690
1691         no_reference = ISC_TRUE;
1692         if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1693                 /*
1694                  * We can now delete the node if the reference counter is
1695                  * zero.  This should be typically the case, but a different
1696                  * thread may still gain a (new) reference just before the
1697                  * current thread locks the tree (e.g., in findnode()).
1698                  */
1699
1700                 /*
1701                  * If this node is the only one in the level it's in, deleting
1702                  * this node may recursively make its parent the only node in
1703                  * the parent level; if so, and if no one is currently using
1704                  * the parent node, this is almost the only opportunity to
1705                  * clean it up.  But the recursive cleanup is not that trivial
1706                  * since the child and parent may be in different lock buckets,
1707                  * which would cause a lock order reversal problem.  To avoid
1708                  * the trouble, we'll dispatch a separate event for batch
1709                  * cleaning.  We need to check whether we're deleting the node
1710                  * as a result of pruning to avoid infinite dispatching.
1711                  * Note: pruning happens only when a task has been set for the
1712                  * rbtdb.  If the user of the rbtdb chooses not to set a task,
1713                  * it's their responsibility to purge stale leaves (e.g. by
1714                  * periodic walk-through).
1715                  */
1716                 if (!pruning && node->parent != NULL &&
1717                     node->parent->down == node && node->left == NULL &&
1718                     node->right == NULL && rbtdb->task != NULL) {
1719                         isc_event_t *ev;
1720                         dns_db_t *db;
1721
1722                         ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1723                                                 DNS_EVENT_RBTPRUNE,
1724                                                 prune_tree, node,
1725                                                 sizeof(isc_event_t));
1726                         if (ev != NULL) {
1727                                 new_reference(rbtdb, node);
1728                                 db = NULL;
1729                                 attach((dns_db_t *)rbtdb, &db);
1730                                 ev->ev_sender = db;
1731                                 isc_task_send(rbtdb->task, &ev);
1732                                 no_reference = ISC_FALSE;
1733                         } else {
1734                                 /*
1735                                  * XXX: this is a weird situation.  We could
1736                                  * ignore this error case, but then the stale
1737                                  * node will unlikely be purged except via a
1738                                  * rare condition such as manual cleanup.  So
1739                                  * we queue it in the deadnodes list, hoping
1740                                  * the memory shortage is temporary and the node
1741                                  * will be deleted later.
1742                                  */
1743                                 isc_log_write(dns_lctx,
1744                                               DNS_LOGCATEGORY_DATABASE,
1745                                               DNS_LOGMODULE_CACHE,
1746                                               ISC_LOG_INFO,
1747                                               "decrement_reference: failed to "
1748                                               "allocate pruning event");
1749                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1750                                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1751                                                 deadlink);
1752                         }
1753                 } else {
1754                         if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1755                                 char printname[DNS_NAME_FORMATSIZE];
1756
1757                                 isc_log_write(dns_lctx,
1758                                               DNS_LOGCATEGORY_DATABASE,
1759                                               DNS_LOGMODULE_CACHE,
1760                                               ISC_LOG_DEBUG(1),
1761                                               "decrement_reference: "
1762                                               "delete from rbt: %p %s",
1763                                               node,
1764                                               dns_rbt_formatnodename(node,
1765                                                         printname,
1766                                                         sizeof(printname)));
1767                         }
1768
1769                         INSIST(!ISC_LINK_LINKED(node, deadlink));
1770                         if (node->nsec3)
1771                                 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1772                                                             ISC_FALSE);
1773                         else
1774                                 result = dns_rbt_deletenode(rbtdb->tree, node,
1775                                                             ISC_FALSE);
1776                         if (result != ISC_R_SUCCESS) {
1777                                 isc_log_write(dns_lctx,
1778                                               DNS_LOGCATEGORY_DATABASE,
1779                                               DNS_LOGMODULE_CACHE,
1780                                               ISC_LOG_WARNING,
1781                                               "decrement_reference: "
1782                                               "dns_rbt_deletenode: %s",
1783                                               isc_result_totext(result));
1784                         }
1785                 }
1786         } else if (dns_rbtnode_refcurrent(node) == 0) {
1787                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1788                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1789         } else
1790                 no_reference = ISC_FALSE;
1791
1792         /* Restore the lock? */
1793         if (nlock == isc_rwlocktype_read)
1794                 NODE_WEAKDOWNGRADE(&nodelock->lock);
1795
1796         /*
1797          * Relock a read lock, or unlock the write lock if no lock was held.
1798          */
1799         if (tlock == isc_rwlocktype_none)
1800                 if (write_locked)
1801                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1802
1803         if (tlock == isc_rwlocktype_read)
1804                 if (write_locked)
1805                         isc_rwlock_downgrade(&rbtdb->tree_lock);
1806
1807         return (no_reference);
1808 }
1809
1810 /*
1811  * Prune the tree by recursively cleaning-up single leaves.  In the worst
1812  * case, the number of iteration is the number of tree levels, which is at
1813  * most the maximum number of domain name labels, i.e, 127.  In practice, this
1814  * should be much smaller (only a few times), and even the worst case would be
1815  * acceptable for a single event.
1816  */
1817 static void
1818 prune_tree(isc_task_t *task, isc_event_t *event) {
1819         dns_rbtdb_t *rbtdb = event->ev_sender;
1820         dns_rbtnode_t *node = event->ev_arg;
1821         dns_rbtnode_t *parent;
1822         unsigned int locknum;
1823
1824         UNUSED(task);
1825
1826         isc_event_free(&event);
1827
1828         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1829         locknum = node->locknum;
1830         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1831         do {
1832                 parent = node->parent;
1833                 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1834                                     isc_rwlocktype_write, ISC_TRUE);
1835
1836                 if (parent != NULL && parent->down == NULL) {
1837                         /*
1838                          * node was the only down child of the parent and has
1839                          * just been removed.  We'll then need to examine the
1840                          * parent.  Keep the lock if possible; otherwise,
1841                          * release the old lock and acquire one for the parent.
1842                          */
1843                         if (parent->locknum != locknum) {
1844                                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1845                                             isc_rwlocktype_write);
1846                                 locknum = parent->locknum;
1847                                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1848                                           isc_rwlocktype_write);
1849                         }
1850
1851                         /*
1852                          * We need to gain a reference to the node before
1853                          * decrementing it in the next iteration.  In addition,
1854                          * if the node is in the dead-nodes list, extract it
1855                          * from the list beforehand as we do in
1856                          * reactivate_node().
1857                          */
1858                         new_reference(rbtdb, parent);
1859                         if (ISC_LINK_LINKED(parent, deadlink)) {
1860                                 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1861                                                 parent, deadlink);
1862                         }
1863                 } else
1864                         parent = NULL;
1865
1866                 node = parent;
1867         } while (node != NULL);
1868         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1869         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1870
1871         detach((dns_db_t **)&rbtdb);
1872 }
1873
1874 static inline void
1875 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1876                    rbtdb_changedlist_t *cleanup_list)
1877 {
1878         /*
1879          * Caller must be holding the database lock.
1880          */
1881
1882         rbtdb->least_serial = version->serial;
1883         *cleanup_list = version->changed_list;
1884         ISC_LIST_INIT(version->changed_list);
1885 }
1886
1887 static inline void
1888 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1889         rbtdb_changed_t *changed, *next_changed;
1890
1891         /*
1892          * If the changed record is dirty, then
1893          * an update created multiple versions of
1894          * a given rdataset.  We keep this list
1895          * until we're the least open version, at
1896          * which point it's safe to get rid of any
1897          * older versions.
1898          *
1899          * If the changed record isn't dirty, then
1900          * we don't need it anymore since we're
1901          * committing and not rolling back.
1902          *
1903          * The caller must be holding the database lock.
1904          */
1905         for (changed = HEAD(version->changed_list);
1906              changed != NULL;
1907              changed = next_changed) {
1908                 next_changed = NEXT(changed, link);
1909                 if (!changed->dirty) {
1910                         UNLINK(version->changed_list,
1911                                changed, link);
1912                         APPEND(*cleanup_list,
1913                                changed, link);
1914                 }
1915         }
1916 }
1917
1918 static void
1919 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1920         dns_rdataset_t keyset;
1921         dns_rdataset_t nsecset, signsecset;
1922         dns_rdata_t rdata = DNS_RDATA_INIT;
1923         isc_boolean_t haszonekey = ISC_FALSE;
1924         isc_boolean_t hasnsec = ISC_FALSE;
1925         isc_boolean_t hasoptbit = ISC_FALSE;
1926         isc_boolean_t nsec3createflag = ISC_FALSE;
1927         isc_result_t result;
1928
1929         dns_rdataset_init(&keyset);
1930         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1931                                      0, 0, &keyset, NULL);
1932         if (result == ISC_R_SUCCESS) {
1933                 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1934                 result = dns_rdataset_first(&keyset);
1935                 while (result == ISC_R_SUCCESS) {
1936                         dns_rdataset_current(&keyset, &keyrdata);
1937                         if (dns_zonekey_iszonekey(&keyrdata)) {
1938                                 haszonekey = ISC_TRUE;
1939                                 break;
1940                         }
1941                         result = dns_rdataset_next(&keyset);
1942                 }
1943                 dns_rdataset_disassociate(&keyset);
1944         }
1945         if (!haszonekey) {
1946                 version->secure = dns_db_insecure;
1947                 version->havensec3 = ISC_FALSE;
1948                 return;
1949         }
1950
1951         dns_rdataset_init(&nsecset);
1952         dns_rdataset_init(&signsecset);
1953         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1954                                      0, 0, &nsecset, &signsecset);
1955         if (result == ISC_R_SUCCESS) {
1956                 if (dns_rdataset_isassociated(&signsecset)) {
1957                         hasnsec = ISC_TRUE;
1958                         result = dns_rdataset_first(&nsecset);
1959                         if (result == ISC_R_SUCCESS) {
1960                                 dns_rdataset_current(&nsecset, &rdata);
1961                                 hasoptbit = dns_nsec_typepresent(&rdata,
1962                                                              dns_rdatatype_opt);
1963                         }
1964                         dns_rdataset_disassociate(&signsecset);
1965                 }
1966                 dns_rdataset_disassociate(&nsecset);
1967         }
1968
1969         setnsec3parameters(db, version, &nsec3createflag);
1970
1971         /*
1972          * Do we have a valid NSEC/NSEC3 chain?
1973          */
1974         if (version->havensec3 || (hasnsec && !hasoptbit))
1975                 version->secure = dns_db_secure;
1976         /*
1977          * Do we have a NSEC/NSEC3 chain under creation?
1978          */
1979         else if (hasoptbit || nsec3createflag)
1980                 version->secure = dns_db_partial;
1981         else
1982                 version->secure = dns_db_insecure;
1983 }
1984
1985 /*%<
1986  * Walk the origin node looking for NSEC3PARAM records.
1987  * Cache the nsec3 parameters.
1988  */
1989 static void
1990 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
1991                    isc_boolean_t *nsec3createflag)
1992 {
1993         dns_rbtnode_t *node;
1994         dns_rdata_nsec3param_t nsec3param;
1995         dns_rdata_t rdata = DNS_RDATA_INIT;
1996         isc_region_t region;
1997         isc_result_t result;
1998         rdatasetheader_t *header, *header_next;
1999         unsigned char *raw;             /* RDATASLAB */
2000         unsigned int count, length;
2001         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2002
2003         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2004         version->havensec3 = ISC_FALSE;
2005         node = rbtdb->origin_node;
2006         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2007                   isc_rwlocktype_read);
2008         for (header = node->data;
2009              header != NULL;
2010              header = header_next) {
2011                 header_next = header->next;
2012                 do {
2013                         if (header->serial <= version->serial &&
2014                             !IGNORE(header)) {
2015                                 if (NONEXISTENT(header))
2016                                         header = NULL;
2017                                 break;
2018                         } else
2019                                 header = header->down;
2020                 } while (header != NULL);
2021
2022                 if (header != NULL &&
2023                     header->type == dns_rdatatype_nsec3param) {
2024                         /*
2025                          * Find A NSEC3PARAM with a supported algorithm.
2026                          */
2027                         raw = (unsigned char *)header + sizeof(*header);
2028                         count = raw[0] * 256 + raw[1]; /* count */
2029 #if DNS_RDATASET_FIXED
2030                         raw += count * 4 + 2;
2031 #else
2032                         raw += 2;
2033 #endif
2034                         while (count-- > 0U) {
2035                                 length = raw[0] * 256 + raw[1];
2036 #if DNS_RDATASET_FIXED
2037                                 raw += 4;
2038 #else
2039                                 raw += 2;
2040 #endif
2041                                 region.base = raw;
2042                                 region.length = length;
2043                                 raw += length;
2044                                 dns_rdata_fromregion(&rdata,
2045                                                      rbtdb->common.rdclass,
2046                                                      dns_rdatatype_nsec3param,
2047                                                      &region);
2048                                 result = dns_rdata_tostruct(&rdata,
2049                                                             &nsec3param,
2050                                                             NULL);
2051                                 INSIST(result == ISC_R_SUCCESS);
2052                                 dns_rdata_reset(&rdata);
2053
2054                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2055                                     !dns_nsec3_supportedhash(nsec3param.hash))
2056                                         continue;
2057
2058 #ifdef RFC5155_STRICT
2059                                 if (nsec3param.flags != 0)
2060                                         continue;
2061 #else
2062                                 if ((nsec3param.flags & DNS_NSEC3FLAG_CREATE)
2063                                     != 0)
2064                                         *nsec3createflag = ISC_TRUE;
2065                                 if ((nsec3param.flags & ~DNS_NSEC3FLAG_OPTOUT)
2066                                     != 0)
2067                                         continue;
2068 #endif
2069
2070                                 memcpy(version->salt, nsec3param.salt,
2071                                        nsec3param.salt_length);
2072                                 version->hash = nsec3param.hash;
2073                                 version->salt_length = nsec3param.salt_length;
2074                                 version->iterations = nsec3param.iterations;
2075                                 version->flags = nsec3param.flags;
2076                                 version->havensec3 = ISC_TRUE;
2077                                 /*
2078                                  * Look for a better algorithm than the
2079                                  * unknown test algorithm.
2080                                  */
2081                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2082                                         goto unlock;
2083                         }
2084                 }
2085         }
2086  unlock:
2087         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2088                     isc_rwlocktype_read);
2089         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2090 }
2091
2092 static void
2093 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
2094         dns_rbtdb_t *rbtdb = event->ev_arg;
2095         isc_boolean_t again = ISC_FALSE;
2096         unsigned int locknum;
2097         unsigned int refs;
2098
2099         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2100         for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
2101                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2102                           isc_rwlocktype_write);
2103                 cleanup_dead_nodes(rbtdb, locknum);
2104                 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL)
2105                         again = ISC_TRUE;
2106                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2107                             isc_rwlocktype_write);
2108         }
2109         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2110         if (again)
2111                 isc_task_send(task, &event);
2112         else {
2113                 isc_event_free(&event);
2114                 isc_refcount_decrement(&rbtdb->references, &refs);
2115                 if (refs == 0)
2116                         maybe_free_rbtdb(rbtdb);
2117         }
2118 }
2119
2120 static void
2121 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2122         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2123         rbtdb_version_t *version, *cleanup_version, *least_greater;
2124         isc_boolean_t rollback = ISC_FALSE;
2125         rbtdb_changedlist_t cleanup_list;
2126         rdatasetheaderlist_t resigned_list;
2127         rbtdb_changed_t *changed, *next_changed;
2128         rbtdb_serial_t serial, least_serial;
2129         dns_rbtnode_t *rbtnode;
2130         unsigned int refs;
2131         rdatasetheader_t *header;
2132         isc_boolean_t writer;
2133
2134         REQUIRE(VALID_RBTDB(rbtdb));
2135         version = (rbtdb_version_t *)*versionp;
2136
2137         cleanup_version = NULL;
2138         ISC_LIST_INIT(cleanup_list);
2139         ISC_LIST_INIT(resigned_list);
2140
2141         isc_refcount_decrement(&version->references, &refs);
2142         if (refs > 0) {         /* typical and easy case first */
2143                 if (commit) {
2144                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2145                         INSIST(!version->writer);
2146                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2147                 }
2148                 goto end;
2149         }
2150
2151         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2152         serial = version->serial;
2153         writer = version->writer;
2154         if (version->writer) {
2155                 if (commit) {
2156                         unsigned cur_ref;
2157                         rbtdb_version_t *cur_version;
2158
2159                         INSIST(version->commit_ok);
2160                         INSIST(version == rbtdb->future_version);
2161                         /*
2162                          * The current version is going to be replaced.
2163                          * Release the (likely last) reference to it from the
2164                          * DB itself and unlink it from the open list.
2165                          */
2166                         cur_version = rbtdb->current_version;
2167                         isc_refcount_decrement(&cur_version->references,
2168                                                &cur_ref);
2169                         if (cur_ref == 0) {
2170                                 if (cur_version->serial == rbtdb->least_serial)
2171                                         INSIST(EMPTY(cur_version->changed_list));
2172                                 UNLINK(rbtdb->open_versions,
2173                                        cur_version, link);
2174                         }
2175                         if (EMPTY(rbtdb->open_versions)) {
2176                                 /*
2177                                  * We're going to become the least open
2178                                  * version.
2179                                  */
2180                                 make_least_version(rbtdb, version,
2181                                                    &cleanup_list);
2182                         } else {
2183                                 /*
2184                                  * Some other open version is the
2185                                  * least version.  We can't cleanup
2186                                  * records that were changed in this
2187                                  * version because the older versions
2188                                  * may still be in use by an open
2189                                  * version.
2190                                  *
2191                                  * We can, however, discard the
2192                                  * changed records for things that
2193                                  * we've added that didn't exist in
2194                                  * prior versions.
2195                                  */
2196                                 cleanup_nondirty(version, &cleanup_list);
2197                         }
2198                         /*
2199                          * If the (soon to be former) current version
2200                          * isn't being used by anyone, we can clean
2201                          * it up.
2202                          */
2203                         if (cur_ref == 0) {
2204                                 cleanup_version = cur_version;
2205                                 APPENDLIST(version->changed_list,
2206                                            cleanup_version->changed_list,
2207                                            link);
2208                         }
2209                         /*
2210                          * Become the current version.
2211                          */
2212                         version->writer = ISC_FALSE;
2213                         rbtdb->current_version = version;
2214                         rbtdb->current_serial = version->serial;
2215                         rbtdb->future_version = NULL;
2216
2217                         /*
2218                          * Keep the current version in the open list, and
2219                          * gain a reference for the DB itself (see the DB
2220                          * creation function below).  This must be the only
2221                          * case where we need to increment the counter from
2222                          * zero and need to use isc_refcount_increment0().
2223                          */
2224                         isc_refcount_increment0(&version->references,
2225                                                 &cur_ref);
2226                         INSIST(cur_ref == 1);
2227                         PREPEND(rbtdb->open_versions,
2228                                 rbtdb->current_version, link);
2229                         resigned_list = version->resigned_list;
2230                         ISC_LIST_INIT(version->resigned_list);
2231                 } else {
2232                         /*
2233                          * We're rolling back this transaction.
2234                          */
2235                         cleanup_list = version->changed_list;
2236                         ISC_LIST_INIT(version->changed_list);
2237                         resigned_list = version->resigned_list;
2238                         ISC_LIST_INIT(version->resigned_list);
2239                         rollback = ISC_TRUE;
2240                         cleanup_version = version;
2241                         rbtdb->future_version = NULL;
2242                 }
2243         } else {
2244                 if (version != rbtdb->current_version) {
2245                         /*
2246                          * There are no external or internal references
2247                          * to this version and it can be cleaned up.
2248                          */
2249                         cleanup_version = version;
2250
2251                         /*
2252                          * Find the version with the least serial
2253                          * number greater than ours.
2254                          */
2255                         least_greater = PREV(version, link);
2256                         if (least_greater == NULL)
2257                                 least_greater = rbtdb->current_version;
2258
2259                         INSIST(version->serial < least_greater->serial);
2260                         /*
2261                          * Is this the least open version?
2262                          */
2263                         if (version->serial == rbtdb->least_serial) {
2264                                 /*
2265                                  * Yes.  Install the new least open
2266                                  * version.
2267                                  */
2268                                 make_least_version(rbtdb,
2269                                                    least_greater,
2270                                                    &cleanup_list);
2271                         } else {
2272                                 /*
2273                                  * Add any unexecuted cleanups to
2274                                  * those of the least greater version.
2275                                  */
2276                                 APPENDLIST(least_greater->changed_list,
2277                                            version->changed_list,
2278                                            link);
2279                         }
2280                 } else if (version->serial == rbtdb->least_serial)
2281                         INSIST(EMPTY(version->changed_list));
2282                 UNLINK(rbtdb->open_versions, version, link);
2283         }
2284         least_serial = rbtdb->least_serial;
2285         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2286
2287         /*
2288          * Update the zone's secure status.
2289          */
2290         if (writer && commit && !IS_CACHE(rbtdb))
2291                 iszonesecure(db, version, rbtdb->origin_node);
2292
2293         if (cleanup_version != NULL) {
2294                 INSIST(EMPTY(cleanup_version->changed_list));
2295                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2296                             sizeof(*cleanup_version));
2297         }
2298
2299         /*
2300          * Commit/rollback re-signed headers.
2301          */
2302         for (header = HEAD(resigned_list);
2303              header != NULL;
2304              header = HEAD(resigned_list)) {
2305                 nodelock_t *lock;
2306
2307                 ISC_LIST_UNLINK(resigned_list, header, link);
2308
2309                 lock = &rbtdb->node_locks[header->node->locknum].lock;
2310                 NODE_LOCK(lock, isc_rwlocktype_write);
2311                 if (rollback)
2312                         resign_insert(rbtdb, header->node->locknum, header);
2313                 decrement_reference(rbtdb, header->node, least_serial,
2314                                     isc_rwlocktype_write, isc_rwlocktype_none,
2315                                     ISC_FALSE);
2316                 NODE_UNLOCK(lock, isc_rwlocktype_write);
2317         }
2318
2319         if (!EMPTY(cleanup_list)) {
2320                 isc_event_t *event = NULL;
2321                 isc_rwlocktype_t tlock = isc_rwlocktype_none;
2322
2323                 if (rbtdb->task != NULL)
2324                         event = isc_event_allocate(rbtdb->common.mctx, NULL,
2325                                                    DNS_EVENT_RBTDEADNODES,
2326                                                    cleanup_dead_nodes_callback,
2327                                                    rbtdb, sizeof(isc_event_t));
2328                 if (event == NULL) {
2329                         /*
2330                          * We acquire a tree write lock here in order to make
2331                          * sure that stale nodes will be removed in
2332                          * decrement_reference().  If we didn't have the lock,
2333                          * those nodes could miss the chance to be removed
2334                          * until the server stops.  The write lock is
2335                          * expensive, but this event should be rare enough
2336                          * to justify the cost.
2337                          */
2338                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2339                         tlock = isc_rwlocktype_write;
2340                 }
2341
2342                 for (changed = HEAD(cleanup_list);
2343                      changed != NULL;
2344                      changed = next_changed) {
2345                         nodelock_t *lock;
2346
2347                         next_changed = NEXT(changed, link);
2348                         rbtnode = changed->node;
2349                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2350
2351                         NODE_LOCK(lock, isc_rwlocktype_write);
2352                         /*
2353                          * This is a good opportunity to purge any dead nodes,
2354                          * so use it.
2355                          */
2356                         if (event == NULL)
2357                                 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2358
2359                         if (rollback)
2360                                 rollback_node(rbtnode, serial);
2361                         decrement_reference(rbtdb, rbtnode, least_serial,
2362                                             isc_rwlocktype_write, tlock,
2363                                             ISC_FALSE);
2364
2365                         NODE_UNLOCK(lock, isc_rwlocktype_write);
2366
2367                         isc_mem_put(rbtdb->common.mctx, changed,
2368                                     sizeof(*changed));
2369                 }
2370                 if (event != NULL) {
2371                         isc_refcount_increment(&rbtdb->references, NULL);
2372                         isc_task_send(rbtdb->task, &event);
2373                 } else
2374                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2375         }
2376
2377  end:
2378         *versionp = NULL;
2379 }
2380
2381 /*
2382  * Add the necessary magic for the wildcard name 'name'
2383  * to be found in 'rbtdb'.
2384  *
2385  * In order for wildcard matching to work correctly in
2386  * zone_find(), we must ensure that a node for the wildcarding
2387  * level exists in the database, and has its 'find_callback'
2388  * and 'wild' bits set.
2389  *
2390  * E.g. if the wildcard name is "*.sub.example." then we
2391  * must ensure that "sub.example." exists and is marked as
2392  * a wildcard level.
2393  */
2394 static isc_result_t
2395 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2396         isc_result_t result;
2397         dns_name_t foundname;
2398         dns_offsets_t offsets;
2399         unsigned int n;
2400         dns_rbtnode_t *node = NULL;
2401
2402         dns_name_init(&foundname, offsets);
2403         n = dns_name_countlabels(name);
2404         INSIST(n >= 2);
2405         n--;
2406         dns_name_getlabelsequence(name, 1, n, &foundname);
2407         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2408         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2409                 return (result);
2410         node->nsec3 = 0;
2411         node->find_callback = 1;
2412         node->wild = 1;
2413         return (ISC_R_SUCCESS);
2414 }
2415
2416 static isc_result_t
2417 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2418         isc_result_t result;
2419         dns_name_t foundname;
2420         dns_offsets_t offsets;
2421         unsigned int n, l, i;
2422
2423         dns_name_init(&foundname, offsets);
2424         n = dns_name_countlabels(name);
2425         l = dns_name_countlabels(&rbtdb->common.origin);
2426         i = l + 1;
2427         while (i < n) {
2428                 dns_rbtnode_t *node = NULL;     /* dummy */
2429                 dns_name_getlabelsequence(name, n - i, i, &foundname);
2430                 if (dns_name_iswildcard(&foundname)) {
2431                         result = add_wildcard_magic(rbtdb, &foundname);
2432                         if (result != ISC_R_SUCCESS)
2433                                 return (result);
2434                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
2435                                                  &node);
2436                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2437                                 return (result);
2438                         node->nsec3 = 0;
2439                 }
2440                 i++;
2441         }
2442         return (ISC_R_SUCCESS);
2443 }
2444
2445 static isc_result_t
2446 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2447          dns_dbnode_t **nodep)
2448 {
2449         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2450         dns_rbtnode_t *node = NULL;
2451         dns_name_t nodename;
2452         isc_result_t result;
2453         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2454
2455         REQUIRE(VALID_RBTDB(rbtdb));
2456
2457         dns_name_init(&nodename, NULL);
2458         RWLOCK(&rbtdb->tree_lock, locktype);
2459         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2460                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2461         if (result != ISC_R_SUCCESS) {
2462                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2463                 if (!create) {
2464                         if (result == DNS_R_PARTIALMATCH)
2465                                 result = ISC_R_NOTFOUND;
2466                         return (result);
2467                 }
2468                 /*
2469                  * It would be nice to try to upgrade the lock instead of
2470                  * unlocking then relocking.
2471                  */
2472                 locktype = isc_rwlocktype_write;
2473                 RWLOCK(&rbtdb->tree_lock, locktype);
2474                 node = NULL;
2475                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2476                 if (result == ISC_R_SUCCESS) {
2477                         dns_rbt_namefromnode(node, &nodename);
2478 #ifdef DNS_RBT_USEHASH
2479                         node->locknum = node->hashval % rbtdb->node_lock_count;
2480 #else
2481                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2482                                 rbtdb->node_lock_count;
2483 #endif
2484                         node->nsec3 = 0;
2485                         add_empty_wildcards(rbtdb, name);
2486
2487                         if (dns_name_iswildcard(name)) {
2488                                 result = add_wildcard_magic(rbtdb, name);
2489                                 if (result != ISC_R_SUCCESS) {
2490                                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2491                                         return (result);
2492                                 }
2493                         }
2494                 } else if (result != ISC_R_EXISTS) {
2495                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2496                         return (result);
2497                 }
2498         }
2499         reactivate_node(rbtdb, node, locktype);
2500         RWUNLOCK(&rbtdb->tree_lock, locktype);
2501
2502         *nodep = (dns_dbnode_t *)node;
2503
2504         return (ISC_R_SUCCESS);
2505 }
2506
2507 static isc_result_t
2508 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2509               dns_dbnode_t **nodep)
2510 {
2511         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2512         dns_rbtnode_t *node = NULL;
2513         dns_name_t nodename;
2514         isc_result_t result;
2515         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2516
2517         REQUIRE(VALID_RBTDB(rbtdb));
2518
2519         dns_name_init(&nodename, NULL);
2520         RWLOCK(&rbtdb->tree_lock, locktype);
2521         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2522                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2523         if (result != ISC_R_SUCCESS) {
2524                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2525                 if (!create) {
2526                         if (result == DNS_R_PARTIALMATCH)
2527                                 result = ISC_R_NOTFOUND;
2528                         return (result);
2529                 }
2530                 /*
2531                  * It would be nice to try to upgrade the lock instead of
2532                  * unlocking then relocking.
2533                  */
2534                 locktype = isc_rwlocktype_write;
2535                 RWLOCK(&rbtdb->tree_lock, locktype);
2536                 node = NULL;
2537                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2538                 if (result == ISC_R_SUCCESS) {
2539                         dns_rbt_namefromnode(node, &nodename);
2540 #ifdef DNS_RBT_USEHASH
2541                         node->locknum = node->hashval % rbtdb->node_lock_count;
2542 #else
2543                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2544                                 rbtdb->node_lock_count;
2545 #endif
2546                         node->nsec3 = 1U;
2547                 } else if (result != ISC_R_EXISTS) {
2548                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2549                         return (result);
2550                 }
2551         } else
2552                 INSIST(node->nsec3);
2553         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2554         new_reference(rbtdb, node);
2555         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2556         RWUNLOCK(&rbtdb->tree_lock, locktype);
2557
2558         *nodep = (dns_dbnode_t *)node;
2559
2560         return (ISC_R_SUCCESS);
2561 }
2562
2563 static isc_result_t
2564 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2565         rbtdb_search_t *search = arg;
2566         rdatasetheader_t *header, *header_next;
2567         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2568         rdatasetheader_t *found;
2569         isc_result_t result;
2570         dns_rbtnode_t *onode;
2571
2572         /*
2573          * We only want to remember the topmost zone cut, since it's the one
2574          * that counts, so we'll just continue if we've already found a
2575          * zonecut.
2576          */
2577         if (search->zonecut != NULL)
2578                 return (DNS_R_CONTINUE);
2579
2580         found = NULL;
2581         result = DNS_R_CONTINUE;
2582         onode = search->rbtdb->origin_node;
2583
2584         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2585                   isc_rwlocktype_read);
2586
2587         /*
2588          * Look for an NS or DNAME rdataset active in our version.
2589          */
2590         ns_header = NULL;
2591         dname_header = NULL;
2592         sigdname_header = NULL;
2593         for (header = node->data; header != NULL; header = header_next) {
2594                 header_next = header->next;
2595                 if (header->type == dns_rdatatype_ns ||
2596                     header->type == dns_rdatatype_dname ||
2597                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
2598                         do {
2599                                 if (header->serial <= search->serial &&
2600                                     !IGNORE(header)) {
2601                                         /*
2602                                          * Is this a "this rdataset doesn't
2603                                          * exist" record?
2604                                          */
2605                                         if (NONEXISTENT(header))
2606                                                 header = NULL;
2607                                         break;
2608                                 } else
2609                                         header = header->down;
2610                         } while (header != NULL);
2611                         if (header != NULL) {
2612                                 if (header->type == dns_rdatatype_dname)
2613                                         dname_header = header;
2614                                 else if (header->type ==
2615                                            RBTDB_RDATATYPE_SIGDNAME)
2616                                         sigdname_header = header;
2617                                 else if (node != onode ||
2618                                          IS_STUB(search->rbtdb)) {
2619                                         /*
2620                                          * We've found an NS rdataset that
2621                                          * isn't at the origin node.  We check
2622                                          * that they're not at the origin node,
2623                                          * because otherwise we'd erroneously
2624                                          * treat the zone top as if it were
2625                                          * a delegation.
2626                                          */
2627                                         ns_header = header;
2628                                 }
2629                         }
2630                 }
2631         }
2632
2633         /*
2634          * Did we find anything?
2635          */
2636         if (dname_header != NULL) {
2637                 /*
2638                  * Note that DNAME has precedence over NS if both exist.
2639                  */
2640                 found = dname_header;
2641                 search->zonecut_sigrdataset = sigdname_header;
2642         } else if (ns_header != NULL) {
2643                 found = ns_header;
2644                 search->zonecut_sigrdataset = NULL;
2645         }
2646
2647         if (found != NULL) {
2648                 /*
2649                  * We increment the reference count on node to ensure that
2650                  * search->zonecut_rdataset will still be valid later.
2651                  */
2652                 new_reference(search->rbtdb, node);
2653                 search->zonecut = node;
2654                 search->zonecut_rdataset = found;
2655                 search->need_cleanup = ISC_TRUE;
2656                 /*
2657                  * Since we've found a zonecut, anything beneath it is
2658                  * glue and is not subject to wildcard matching, so we
2659                  * may clear search->wild.
2660                  */
2661                 search->wild = ISC_FALSE;
2662                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2663                         /*
2664                          * If the caller does not want to find glue, then
2665                          * this is the best answer and the search should
2666                          * stop now.
2667                          */
2668                         result = DNS_R_PARTIALMATCH;
2669                 } else {
2670                         dns_name_t *zcname;
2671
2672                         /*
2673                          * The search will continue beneath the zone cut.
2674                          * This may or may not be the best match.  In case it
2675                          * is, we need to remember the node name.
2676                          */
2677                         zcname = dns_fixedname_name(&search->zonecut_name);
2678                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2679                                       ISC_R_SUCCESS);
2680                         search->copy_name = ISC_TRUE;
2681                 }
2682         } else {
2683                 /*
2684                  * There is no zonecut at this node which is active in this
2685                  * version.
2686                  *
2687                  * If this is a "wild" node and the caller hasn't disabled
2688                  * wildcard matching, remember that we've seen a wild node
2689                  * in case we need to go searching for wildcard matches
2690                  * later on.
2691                  */
2692                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2693                         search->wild = ISC_TRUE;
2694         }
2695
2696         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2697                     isc_rwlocktype_read);
2698
2699         return (result);
2700 }
2701
2702 static inline void
2703 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2704               rdatasetheader_t *header, isc_stdtime_t now,
2705               dns_rdataset_t *rdataset)
2706 {
2707         unsigned char *raw;     /* RDATASLAB */
2708
2709         /*
2710          * Caller must be holding the node reader lock.
2711          * XXXJT: technically, we need a writer lock, since we'll increment
2712          * the header count below.  However, since the actual counter value
2713          * doesn't matter, we prioritize performance here.  (We may want to
2714          * use atomic increment when available).
2715          */
2716
2717         if (rdataset == NULL)
2718                 return;
2719
2720         new_reference(rbtdb, node);
2721
2722         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
2723
2724         rdataset->methods = &rdataset_methods;
2725         rdataset->rdclass = rbtdb->common.rdclass;
2726         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2727         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2728         rdataset->ttl = header->rdh_ttl - now;
2729         rdataset->trust = header->trust;
2730         if (NXDOMAIN(header))
2731                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2732         if (OPTOUT(header))
2733                 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2734         rdataset->private1 = rbtdb;
2735         rdataset->private2 = node;
2736         raw = (unsigned char *)header + sizeof(*header);
2737         rdataset->private3 = raw;
2738         rdataset->count = header->count++;
2739         if (rdataset->count == ISC_UINT32_MAX)
2740                 rdataset->count = 0;
2741
2742         /*
2743          * Reset iterator state.
2744          */
2745         rdataset->privateuint4 = 0;
2746         rdataset->private5 = NULL;
2747
2748         /*
2749          * Add noqname proof.
2750          */
2751         rdataset->private6 = header->noqname;
2752         if (rdataset->private6 != NULL)
2753                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
2754         rdataset->private7 = header->closest;
2755         if (rdataset->private7 != NULL)
2756                 rdataset->attributes |=  DNS_RDATASETATTR_CLOSEST;
2757
2758         /*
2759          * Copy out re-signing information.
2760          */
2761         if (RESIGN(header)) {
2762                 rdataset->attributes |=  DNS_RDATASETATTR_RESIGN;
2763                 rdataset->resign = header->resign;
2764         } else
2765                 rdataset->resign = 0;
2766 }
2767
2768 static inline isc_result_t
2769 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2770                  dns_name_t *foundname, dns_rdataset_t *rdataset,
2771                  dns_rdataset_t *sigrdataset)
2772 {
2773         isc_result_t result;
2774         dns_name_t *zcname;
2775         rbtdb_rdatatype_t type;
2776         dns_rbtnode_t *node;
2777
2778         /*
2779          * The caller MUST NOT be holding any node locks.
2780          */
2781
2782         node = search->zonecut;
2783         type = search->zonecut_rdataset->type;
2784
2785         /*
2786          * If we have to set foundname, we do it before anything else.
2787          * If we were to set foundname after we had set nodep or bound the
2788          * rdataset, then we'd have to undo that work if dns_name_copy()
2789          * failed.  By setting foundname first, there's nothing to undo if
2790          * we have trouble.
2791          */
2792         if (foundname != NULL && search->copy_name) {
2793                 zcname = dns_fixedname_name(&search->zonecut_name);
2794                 result = dns_name_copy(zcname, foundname, NULL);
2795                 if (result != ISC_R_SUCCESS)
2796                         return (result);
2797         }
2798         if (nodep != NULL) {
2799                 /*
2800                  * Note that we don't have to increment the node's reference
2801                  * count here because we're going to use the reference we
2802                  * already have in the search block.
2803                  */
2804                 *nodep = node;
2805                 search->need_cleanup = ISC_FALSE;
2806         }
2807         if (rdataset != NULL) {
2808                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2809                           isc_rwlocktype_read);
2810                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2811                               search->now, rdataset);
2812                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2813                         bind_rdataset(search->rbtdb, node,
2814                                       search->zonecut_sigrdataset,
2815                                       search->now, sigrdataset);
2816                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2817                             isc_rwlocktype_read);
2818         }
2819
2820         if (type == dns_rdatatype_dname)
2821                 return (DNS_R_DNAME);
2822         return (DNS_R_DELEGATION);
2823 }
2824
2825 static inline isc_boolean_t
2826 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2827            dns_rbtnode_t *node)
2828 {
2829         unsigned char *raw;     /* RDATASLAB */
2830         unsigned int count, size;
2831         dns_name_t ns_name;
2832         isc_boolean_t valid = ISC_FALSE;
2833         dns_offsets_t offsets;
2834         isc_region_t region;
2835         rdatasetheader_t *header;
2836
2837         /*
2838          * No additional locking is required.
2839          */
2840
2841         /*
2842          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
2843          * if it occurs at a zone cut, but is not valid below it.
2844          */
2845         if (type == dns_rdatatype_ns) {
2846                 if (node != search->zonecut) {
2847                         return (ISC_FALSE);
2848                 }
2849         } else if (type != dns_rdatatype_a &&
2850                    type != dns_rdatatype_aaaa &&
2851                    type != dns_rdatatype_a6) {
2852                 return (ISC_FALSE);
2853         }
2854
2855         header = search->zonecut_rdataset;
2856         raw = (unsigned char *)header + sizeof(*header);
2857         count = raw[0] * 256 + raw[1];
2858 #if DNS_RDATASET_FIXED
2859         raw += 2 + (4 * count);
2860 #else
2861         raw += 2;
2862 #endif
2863
2864         while (count > 0) {
2865                 count--;
2866                 size = raw[0] * 256 + raw[1];
2867 #if DNS_RDATASET_FIXED
2868                 raw += 4;
2869 #else
2870                 raw += 2;
2871 #endif
2872                 region.base = raw;
2873                 region.length = size;
2874                 raw += size;
2875                 /*
2876                  * XXX Until we have rdata structures, we have no choice but
2877                  * to directly access the rdata format.
2878                  */
2879                 dns_name_init(&ns_name, offsets);
2880                 dns_name_fromregion(&ns_name, &region);
2881                 if (dns_name_compare(&ns_name, name) == 0) {
2882                         valid = ISC_TRUE;
2883                         break;
2884                 }
2885         }
2886
2887         return (valid);
2888 }
2889
2890 static inline isc_boolean_t
2891 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2892             dns_name_t *name)
2893 {
2894         dns_fixedname_t fnext;
2895         dns_fixedname_t forigin;
2896         dns_name_t *next;
2897         dns_name_t *origin;
2898         dns_name_t prefix;
2899         dns_rbtdb_t *rbtdb;
2900         dns_rbtnode_t *node;
2901         isc_result_t result;
2902         isc_boolean_t answer = ISC_FALSE;
2903         rdatasetheader_t *header;
2904
2905         rbtdb = search->rbtdb;
2906
2907         dns_name_init(&prefix, NULL);
2908         dns_fixedname_init(&fnext);
2909         next = dns_fixedname_name(&fnext);
2910         dns_fixedname_init(&forigin);
2911         origin = dns_fixedname_name(&forigin);
2912
2913         result = dns_rbtnodechain_next(chain, NULL, NULL);
2914         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2915                 node = NULL;
2916                 result = dns_rbtnodechain_current(chain, &prefix,
2917                                                   origin, &node);
2918                 if (result != ISC_R_SUCCESS)
2919                         break;
2920                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2921                           isc_rwlocktype_read);
2922                 for (header = node->data;
2923                      header != NULL;
2924                      header = header->next) {
2925                         if (header->serial <= search->serial &&
2926                             !IGNORE(header) && EXISTS(header))
2927                                 break;
2928                 }
2929                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2930                             isc_rwlocktype_read);
2931                 if (header != NULL)
2932                         break;
2933                 result = dns_rbtnodechain_next(chain, NULL, NULL);
2934         }
2935         if (result == ISC_R_SUCCESS)
2936                 result = dns_name_concatenate(&prefix, origin, next, NULL);
2937         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2938                 answer = ISC_TRUE;
2939         return (answer);
2940 }
2941
2942 static inline isc_boolean_t
2943 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2944         dns_fixedname_t fnext;
2945         dns_fixedname_t forigin;
2946         dns_fixedname_t fprev;
2947         dns_name_t *next;
2948         dns_name_t *origin;
2949         dns_name_t *prev;
2950         dns_name_t name;
2951         dns_name_t rname;
2952         dns_name_t tname;
2953         dns_rbtdb_t *rbtdb;
2954         dns_rbtnode_t *node;
2955         dns_rbtnodechain_t chain;
2956         isc_boolean_t check_next = ISC_TRUE;
2957         isc_boolean_t check_prev = ISC_TRUE;
2958         isc_boolean_t answer = ISC_FALSE;
2959         isc_result_t result;
2960         rdatasetheader_t *header;
2961         unsigned int n;
2962
2963         rbtdb = search->rbtdb;
2964
2965         dns_name_init(&name, NULL);
2966         dns_name_init(&tname, NULL);
2967         dns_name_init(&rname, NULL);
2968         dns_fixedname_init(&fnext);
2969         next = dns_fixedname_name(&fnext);
2970         dns_fixedname_init(&fprev);
2971         prev = dns_fixedname_name(&fprev);
2972         dns_fixedname_init(&forigin);
2973         origin = dns_fixedname_name(&forigin);
2974
2975         /*
2976          * Find if qname is at or below a empty node.
2977          * Use our own copy of the chain.
2978          */
2979
2980         chain = search->chain;
2981         do {
2982                 node = NULL;
2983                 result = dns_rbtnodechain_current(&chain, &name,
2984                                                   origin, &node);
2985                 if (result != ISC_R_SUCCESS)
2986                         break;
2987                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2988                           isc_rwlocktype_read);
2989                 for (header = node->data;
2990                      header != NULL;
2991                      header = header->next) {
2992                         if (header->serial <= search->serial &&
2993                             !IGNORE(header) && EXISTS(header))
2994                                 break;
2995                 }
2996                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2997                             isc_rwlocktype_read);
2998                 if (header != NULL)
2999                         break;
3000                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
3001         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
3002         if (result == ISC_R_SUCCESS)
3003                 result = dns_name_concatenate(&name, origin, prev, NULL);
3004         if (result != ISC_R_SUCCESS)
3005                 check_prev = ISC_FALSE;
3006
3007         result = dns_rbtnodechain_next(&chain, NULL, NULL);
3008         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3009                 node = NULL;
3010                 result = dns_rbtnodechain_current(&chain, &name,
3011                                                   origin, &node);
3012                 if (result != ISC_R_SUCCESS)
3013                         break;
3014                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3015                           isc_rwlocktype_read);
3016                 for (header = node->data;
3017                      header != NULL;
3018                      header = header->next) {
3019                         if (header->serial <= search->serial &&
3020                             !IGNORE(header) && EXISTS(header))
3021                                 break;
3022                 }
3023                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3024                             isc_rwlocktype_read);
3025                 if (header != NULL)
3026                         break;
3027                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3028         }
3029         if (result == ISC_R_SUCCESS)
3030                 result = dns_name_concatenate(&name, origin, next, NULL);
3031         if (result != ISC_R_SUCCESS)
3032                 check_next = ISC_FALSE;
3033
3034         dns_name_clone(qname, &rname);
3035
3036         /*
3037          * Remove the wildcard label to find the terminal name.
3038          */
3039         n = dns_name_countlabels(wname);
3040         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3041
3042         do {
3043                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3044                     (check_next && dns_name_issubdomain(next, &rname))) {
3045                         answer = ISC_TRUE;
3046                         break;
3047                 }
3048                 /*
3049                  * Remove the left hand label.
3050                  */
3051                 n = dns_name_countlabels(&rname);
3052                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3053         } while (!dns_name_equal(&rname, &tname));
3054         return (answer);
3055 }
3056
3057 static inline isc_result_t
3058 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3059               dns_name_t *qname)
3060 {
3061         unsigned int i, j;
3062         dns_rbtnode_t *node, *level_node, *wnode;
3063         rdatasetheader_t *header;
3064         isc_result_t result = ISC_R_NOTFOUND;
3065         dns_name_t name;
3066         dns_name_t *wname;
3067         dns_fixedname_t fwname;
3068         dns_rbtdb_t *rbtdb;
3069         isc_boolean_t done, wild, active;
3070         dns_rbtnodechain_t wchain;
3071
3072         /*
3073          * Caller must be holding the tree lock and MUST NOT be holding
3074          * any node locks.
3075          */
3076
3077         /*
3078          * Examine each ancestor level.  If the level's wild bit
3079          * is set, then construct the corresponding wildcard name and
3080          * search for it.  If the wildcard node exists, and is active in
3081          * this version, we're done.  If not, then we next check to see
3082          * if the ancestor is active in this version.  If so, then there
3083          * can be no possible wildcard match and again we're done.  If not,
3084          * continue the search.
3085          */
3086
3087         rbtdb = search->rbtdb;
3088         i = search->chain.level_matches;
3089         done = ISC_FALSE;
3090         node = *nodep;
3091         do {
3092                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3093                           isc_rwlocktype_read);
3094
3095                 /*
3096                  * First we try to figure out if this node is active in
3097                  * the search's version.  We do this now, even though we
3098                  * may not need the information, because it simplifies the
3099                  * locking and code flow.
3100                  */
3101                 for (header = node->data;
3102                      header != NULL;
3103                      header = header->next) {
3104                         if (header->serial <= search->serial &&
3105                             !IGNORE(header) && EXISTS(header))
3106                                 break;
3107                 }
3108                 if (header != NULL)
3109                         active = ISC_TRUE;
3110                 else
3111                         active = ISC_FALSE;
3112
3113                 if (node->wild)
3114                         wild = ISC_TRUE;
3115                 else
3116                         wild = ISC_FALSE;
3117
3118                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3119                             isc_rwlocktype_read);
3120
3121                 if (wild) {
3122                         /*
3123                          * Construct the wildcard name for this level.
3124                          */
3125                         dns_name_init(&name, NULL);
3126                         dns_rbt_namefromnode(node, &name);
3127                         dns_fixedname_init(&fwname);
3128                         wname = dns_fixedname_name(&fwname);
3129                         result = dns_name_concatenate(dns_wildcardname, &name,
3130                                                       wname, NULL);
3131                         j = i;
3132                         while (result == ISC_R_SUCCESS && j != 0) {
3133                                 j--;
3134                                 level_node = search->chain.levels[j];
3135                                 dns_name_init(&name, NULL);
3136                                 dns_rbt_namefromnode(level_node, &name);
3137                                 result = dns_name_concatenate(wname,
3138                                                               &name,
3139                                                               wname,
3140                                                               NULL);
3141                         }
3142                         if (result != ISC_R_SUCCESS)
3143                                 break;
3144
3145                         wnode = NULL;
3146                         dns_rbtnodechain_init(&wchain, NULL);
3147                         result = dns_rbt_findnode(rbtdb->tree, wname,
3148                                                   NULL, &wnode, &wchain,
3149                                                   DNS_RBTFIND_EMPTYDATA,
3150                                                   NULL, NULL);
3151                         if (result == ISC_R_SUCCESS) {
3152                                 nodelock_t *lock;
3153
3154                                 /*
3155                                  * We have found the wildcard node.  If it
3156                                  * is active in the search's version, we're
3157                                  * done.
3158                                  */
3159                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
3160                                 NODE_LOCK(lock, isc_rwlocktype_read);
3161                                 for (header = wnode->data;
3162                                      header != NULL;
3163                                      header = header->next) {
3164                                         if (header->serial <= search->serial &&
3165                                             !IGNORE(header) && EXISTS(header))
3166                                                 break;
3167                                 }
3168                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3169                                 if (header != NULL ||
3170                                     activeempty(search, &wchain, wname)) {
3171                                         if (activeemtpynode(search, qname,
3172                                                             wname)) {
3173                                                 return (ISC_R_NOTFOUND);
3174                                         }
3175                                         /*
3176                                          * The wildcard node is active!
3177                                          *
3178                                          * Note: result is still ISC_R_SUCCESS
3179                                          * so we don't have to set it.
3180                                          */
3181                                         *nodep = wnode;
3182                                         break;
3183                                 }
3184                         } else if (result != ISC_R_NOTFOUND &&
3185                                    result != DNS_R_PARTIALMATCH) {
3186                                 /*
3187                                  * An error has occurred.  Bail out.
3188                                  */
3189                                 break;
3190                         }
3191                 }
3192
3193                 if (active) {
3194                         /*
3195                          * The level node is active.  Any wildcarding
3196                          * present at higher levels has no
3197                          * effect and we're done.
3198                          */
3199                         result = ISC_R_NOTFOUND;
3200                         break;
3201                 }
3202
3203                 if (i > 0) {
3204                         i--;
3205                         node = search->chain.levels[i];
3206                 } else
3207                         done = ISC_TRUE;
3208         } while (!done);
3209
3210         return (result);
3211 }
3212
3213 static isc_boolean_t
3214 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3215 {
3216         dns_rdata_t rdata = DNS_RDATA_INIT;
3217         dns_rdata_nsec3_t nsec3;
3218         unsigned char *raw;                     /* RDATASLAB */
3219         unsigned int rdlen, count;
3220         isc_region_t region;
3221         isc_result_t result;
3222
3223         REQUIRE(header->type == dns_rdatatype_nsec3);
3224
3225         raw = (unsigned char *)header + sizeof(*header);
3226         count = raw[0] * 256 + raw[1]; /* count */
3227 #if DNS_RDATASET_FIXED
3228         raw += count * 4 + 2;
3229 #else
3230         raw += 2;
3231 #endif
3232         while (count-- > 0) {
3233                 rdlen = raw[0] * 256 + raw[1];
3234 #if DNS_RDATASET_FIXED
3235                 raw += 4;
3236 #else
3237                 raw += 2;
3238 #endif
3239                 region.base = raw;
3240                 region.length = rdlen;
3241                 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3242                                      dns_rdatatype_nsec3, &region);
3243                 raw += rdlen;
3244                 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3245                 INSIST(result == ISC_R_SUCCESS);
3246                 if (nsec3.hash == search->rbtversion->hash &&
3247                     nsec3.iterations == search->rbtversion->iterations &&
3248                     nsec3.salt_length == search->rbtversion->salt_length &&
3249                     memcmp(nsec3.salt, search->rbtversion->salt,
3250                            nsec3.salt_length) == 0)
3251                         return (ISC_TRUE);
3252                 dns_rdata_reset(&rdata);
3253         }
3254         return (ISC_FALSE);
3255 }
3256
3257 /*
3258  * Find node of the NSEC/NSEC3 record that is 'name'.
3259  */
3260 static inline isc_result_t
3261 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3262                   dns_name_t *foundname, dns_rdataset_t *rdataset,
3263                   dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3264                   dns_db_secure_t secure)
3265 {
3266         dns_rbtnode_t *node;
3267         rdatasetheader_t *header, *header_next, *found, *foundsig;
3268         isc_boolean_t empty_node;
3269         isc_result_t result;
3270         dns_fixedname_t fname, forigin;
3271         dns_name_t *name, *origin;
3272         dns_rdatatype_t type;
3273         rbtdb_rdatatype_t sigtype;
3274         isc_boolean_t wraps;
3275         isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3276
3277         if (tree == search->rbtdb->nsec3) {
3278                 type = dns_rdatatype_nsec3;
3279                 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3280                 wraps = ISC_TRUE;
3281         } else {
3282                 type = dns_rdatatype_nsec;
3283                 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3284                 wraps = ISC_FALSE;
3285         }
3286
3287  again:
3288         do {
3289                 node = NULL;
3290                 dns_fixedname_init(&fname);
3291                 name = dns_fixedname_name(&fname);
3292                 dns_fixedname_init(&forigin);
3293                 origin = dns_fixedname_name(&forigin);
3294                 result = dns_rbtnodechain_current(&search->chain, name,
3295                                                   origin, &node);
3296                 if (result != ISC_R_SUCCESS)
3297                         return (result);
3298                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3299                           isc_rwlocktype_read);
3300                 found = NULL;
3301                 foundsig = NULL;
3302                 empty_node = ISC_TRUE;
3303                 for (header = node->data;
3304                      header != NULL;
3305                      header = header_next) {
3306                         header_next = header->next;
3307                         /*
3308                          * Look for an active, extant NSEC or RRSIG NSEC.
3309                          */
3310                         do {
3311                                 if (header->serial <= search->serial &&
3312                                     !IGNORE(header)) {
3313                                         /*
3314                                          * Is this a "this rdataset doesn't
3315                                          * exist" record?
3316                                          */
3317                                         if (NONEXISTENT(header))
3318                                                 header = NULL;
3319                                         break;
3320                                 } else
3321                                         header = header->down;
3322                         } while (header != NULL);
3323                         if (header != NULL) {
3324                                 /*
3325                                  * We now know that there is at least one
3326                                  * active rdataset at this node.
3327                                  */
3328                                 empty_node = ISC_FALSE;
3329                                 if (header->type == type) {
3330                                         found = header;
3331                                         if (foundsig != NULL)
3332                                                 break;
3333                                 } else if (header->type == sigtype) {
3334                                         foundsig = header;
3335                                         if (found != NULL)
3336                                                 break;
3337                                 }
3338                         }
3339                 }
3340                 if (!empty_node) {
3341                         if (found != NULL && search->rbtversion->havensec3 &&
3342                             found->type == dns_rdatatype_nsec3 &&
3343                             !matchparams(found, search)) {
3344                                 empty_node = ISC_TRUE;
3345                                 found = NULL;
3346                                 foundsig = NULL;
3347                                 result = dns_rbtnodechain_prev(&search->chain,
3348                                                                NULL, NULL);
3349                         } else if (found != NULL &&
3350                                    (foundsig != NULL || !need_sig))
3351                         {
3352                                 /*
3353                                  * We've found the right NSEC/NSEC3 record.
3354                                  *
3355                                  * Note: for this to really be the right
3356                                  * NSEC record, it's essential that the NSEC
3357                                  * records of any nodes obscured by a zone
3358                                  * cut have been removed; we assume this is
3359                                  * the case.
3360                                  */
3361                                 result = dns_name_concatenate(name, origin,
3362                                                               foundname, NULL);
3363                                 if (result == ISC_R_SUCCESS) {
3364                                         if (nodep != NULL) {
3365                                                 new_reference(search->rbtdb,
3366                                                               node);
3367                                                 *nodep = node;
3368                                         }
3369                                         bind_rdataset(search->rbtdb, node,
3370                                                       found, search->now,
3371                                                       rdataset);
3372                                         if (foundsig != NULL)
3373                                                 bind_rdataset(search->rbtdb,
3374                                                               node,
3375                                                               foundsig,
3376                                                               search->now,
3377                                                               sigrdataset);
3378                                 }
3379                         } else if (found == NULL && foundsig == NULL) {
3380                                 /*
3381                                  * This node is active, but has no NSEC or
3382                                  * RRSIG NSEC.  That means it's glue or
3383                                  * other obscured zone data that isn't
3384                                  * relevant for our search.  Treat the
3385                                  * node as if it were empty and keep looking.
3386                                  */
3387                                 empty_node = ISC_TRUE;
3388                                 result = dns_rbtnodechain_prev(&search->chain,
3389                                                                NULL, NULL);
3390                         } else {
3391                                 /*
3392                                  * We found an active node, but either the
3393                                  * NSEC or the RRSIG NSEC is missing.  This
3394                                  * shouldn't happen.
3395                                  */
3396                                 result = DNS_R_BADDB;
3397                         }
3398                 } else {
3399                         /*
3400                          * This node isn't active.  We've got to keep
3401                          * looking.
3402                          */
3403                         result = dns_rbtnodechain_prev(&search->chain, NULL,
3404                                                        NULL);
3405                 }
3406                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3407                             isc_rwlocktype_read);
3408         } while (empty_node && result == ISC_R_SUCCESS);
3409
3410         if (result == ISC_R_NOMORE && wraps) {
3411                 result = dns_rbtnodechain_last(&search->chain, tree,
3412                                                NULL, NULL);
3413                 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3414                         wraps = ISC_FALSE;
3415                         goto again;
3416                 }
3417         }
3418
3419         /*
3420          * If the result is ISC_R_NOMORE, then we got to the beginning of
3421          * the database and didn't find a NSEC record.  This shouldn't
3422          * happen.
3423          */
3424         if (result == ISC_R_NOMORE)
3425                 result = DNS_R_BADDB;
3426
3427         return (result);
3428 }
3429
3430 static isc_result_t
3431 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3432           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3433           dns_dbnode_t **nodep, dns_name_t *foundname,
3434           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3435 {
3436         dns_rbtnode_t *node = NULL;
3437         isc_result_t result;
3438         rbtdb_search_t search;
3439         isc_boolean_t cname_ok = ISC_TRUE;
3440         isc_boolean_t close_version = ISC_FALSE;
3441         isc_boolean_t maybe_zonecut = ISC_FALSE;
3442         isc_boolean_t at_zonecut = ISC_FALSE;
3443         isc_boolean_t wild;
3444         isc_boolean_t empty_node;
3445         rdatasetheader_t *header, *header_next, *found, *nsecheader;
3446         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3447         rbtdb_rdatatype_t sigtype;
3448         isc_boolean_t active;
3449         dns_rbtnodechain_t chain;
3450         nodelock_t *lock;
3451         dns_rbt_t *tree;
3452
3453         search.rbtdb = (dns_rbtdb_t *)db;
3454
3455         REQUIRE(VALID_RBTDB(search.rbtdb));
3456
3457         /*
3458          * We don't care about 'now'.
3459          */
3460         UNUSED(now);
3461
3462         /*
3463          * If the caller didn't supply a version, attach to the current
3464          * version.
3465          */
3466         if (version == NULL) {
3467                 currentversion(db, &version);
3468                 close_version = ISC_TRUE;
3469         }
3470
3471         search.rbtversion = version;
3472         search.serial = search.rbtversion->serial;
3473         search.options = options;
3474         search.copy_name = ISC_FALSE;
3475         search.need_cleanup = ISC_FALSE;
3476         search.wild = ISC_FALSE;
3477         search.zonecut = NULL;
3478         dns_fixedname_init(&search.zonecut_name);
3479         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3480         search.now = 0;
3481
3482         /*
3483          * 'wild' will be true iff. we've matched a wildcard.
3484          */
3485         wild = ISC_FALSE;
3486
3487         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3488
3489         /*
3490          * Search down from the root of the tree.  If, while going down, we
3491          * encounter a callback node, zone_zonecut_callback() will search the
3492          * rdatasets at the zone cut for active DNAME or NS rdatasets.
3493          */
3494         tree =  (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3495                                                          search.rbtdb->tree;
3496         result = dns_rbt_findnode(tree, name, foundname, &node,
3497                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
3498                                   zone_zonecut_callback, &search);
3499
3500         if (result == DNS_R_PARTIALMATCH) {
3501         partial_match:
3502                 if (search.zonecut != NULL) {
3503                     result = setup_delegation(&search, nodep, foundname,
3504                                               rdataset, sigrdataset);
3505                     goto tree_exit;
3506                 }
3507
3508                 if (search.wild) {
3509                         /*
3510                          * At least one of the levels in the search chain
3511                          * potentially has a wildcard.  For each such level,
3512                          * we must see if there's a matching wildcard active
3513                          * in the current version.
3514                          */
3515                         result = find_wildcard(&search, &node, name);
3516                         if (result == ISC_R_SUCCESS) {
3517                                 result = dns_name_copy(name, foundname, NULL);
3518                                 if (result != ISC_R_SUCCESS)
3519                                         goto tree_exit;
3520                                 wild = ISC_TRUE;
3521                                 goto found;
3522                         }
3523                         else if (result != ISC_R_NOTFOUND)
3524                                 goto tree_exit;
3525                 }
3526
3527                 chain = search.chain;
3528                 active = activeempty(&search, &chain, name);
3529
3530                 /*
3531                  * If we're here, then the name does not exist, is not
3532                  * beneath a zonecut, and there's no matching wildcard.
3533                  */
3534                 if ((search.rbtversion->secure == dns_db_secure &&
3535                      !search.rbtversion->havensec3) ||
3536                     (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3537                     (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3538                 {
3539                         result = find_closest_nsec(&search, nodep, foundname,
3540                                                    rdataset, sigrdataset, tree,
3541                                                    search.rbtversion->secure);
3542                         if (result == ISC_R_SUCCESS)
3543                                 result = active ? DNS_R_EMPTYNAME :
3544                                                   DNS_R_NXDOMAIN;
3545                 } else
3546                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3547                 goto tree_exit;
3548         } else if (result != ISC_R_SUCCESS)
3549                 goto tree_exit;
3550
3551  found:
3552         /*
3553          * We have found a node whose name is the desired name, or we
3554          * have matched a wildcard.
3555          */
3556
3557         if (search.zonecut != NULL) {
3558                 /*
3559                  * If we're beneath a zone cut, we don't want to look for
3560                  * CNAMEs because they're not legitimate zone glue.
3561                  */
3562                 cname_ok = ISC_FALSE;
3563         } else {
3564                 /*
3565                  * The node may be a zone cut itself.  If it might be one,
3566                  * make sure we check for it later.
3567                  *
3568                  * DS records live above the zone cut in ordinary zone so
3569                  * we want to ignore any referral.
3570                  *
3571                  * Stub zones don't have anything "above" the delgation so
3572                  * we always return a referral.
3573                  */
3574                 if (node->find_callback &&
3575                     ((node != search.rbtdb->origin_node &&
3576                       !dns_rdatatype_atparent(type)) ||
3577                      IS_STUB(search.rbtdb)))
3578                         maybe_zonecut = ISC_TRUE;
3579         }
3580
3581         /*
3582          * Certain DNSSEC types are not subject to CNAME matching
3583          * (RFC4035, section 2.5 and RFC3007).
3584          *
3585          * We don't check for RRSIG, because we don't store RRSIG records
3586          * directly.
3587          */
3588         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3589                 cname_ok = ISC_FALSE;
3590
3591         /*
3592          * We now go looking for rdata...
3593          */
3594
3595         lock = &search.rbtdb->node_locks[node->locknum].lock;
3596         NODE_LOCK(lock, isc_rwlocktype_read);
3597
3598         found = NULL;
3599         foundsig = NULL;
3600         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3601         nsecheader = NULL;
3602         nsecsig = NULL;
3603         cnamesig = NULL;
3604         empty_node = ISC_TRUE;
3605         for (header = node->data; header != NULL; header = header_next) {
3606                 header_next = header->next;
3607                 /*
3608                  * Look for an active, extant rdataset.
3609                  */
3610                 do {
3611                         if (header->serial <= search.serial &&
3612                             !IGNORE(header)) {
3613                                 /*
3614                                  * Is this a "this rdataset doesn't
3615                                  * exist" record?
3616                                  */
3617                                 if (NONEXISTENT(header))
3618                                         header = NULL;
3619                                 break;
3620                         } else
3621                                 header = header->down;
3622                 } while (header != NULL);
3623                 if (header != NULL) {
3624                         /*
3625                          * We now know that there is at least one active
3626                          * rdataset at this node.
3627                          */
3628                         empty_node = ISC_FALSE;
3629
3630                         /*
3631                          * Do special zone cut handling, if requested.
3632                          */
3633                         if (maybe_zonecut &&
3634                             header->type == dns_rdatatype_ns) {
3635                                 /*
3636                                  * We increment the reference count on node to
3637                                  * ensure that search->zonecut_rdataset will
3638                                  * still be valid later.
3639                                  */
3640                                 new_reference(search.rbtdb, node);
3641                                 search.zonecut = node;
3642                                 search.zonecut_rdataset = header;
3643                                 search.zonecut_sigrdataset = NULL;
3644                                 search.need_cleanup = ISC_TRUE;
3645                                 maybe_zonecut = ISC_FALSE;
3646                                 at_zonecut = ISC_TRUE;
3647                                 /*
3648                                  * It is not clear if KEY should still be
3649                                  * allowed at the parent side of the zone
3650                                  * cut or not.  It is needed for RFC3007
3651                                  * validated updates.
3652                                  */
3653                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3654                                     && type != dns_rdatatype_nsec
3655                                     && type != dns_rdatatype_key) {
3656                                         /*
3657                                          * Glue is not OK, but any answer we
3658                                          * could return would be glue.  Return
3659                                          * the delegation.
3660                                          */
3661                                         found = NULL;
3662                                         break;
3663                                 }
3664                                 if (found != NULL && foundsig != NULL)
3665                                         break;
3666                         }
3667
3668
3669                         /*
3670                          * If the NSEC3 record doesn't match the chain
3671                          * we are using behave as if it isn't here.
3672                          */
3673                         if (header->type == dns_rdatatype_nsec3 &&
3674                            !matchparams(header, &search)) {
3675                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3676                                 goto partial_match;
3677                         }
3678                         /*
3679                          * If we found a type we were looking for,
3680                          * remember it.
3681                          */
3682                         if (header->type == type ||
3683                             type == dns_rdatatype_any ||
3684                             (header->type == dns_rdatatype_cname &&
3685                              cname_ok)) {
3686                                 /*
3687                                  * We've found the answer!
3688                                  */
3689                                 found = header;
3690                                 if (header->type == dns_rdatatype_cname &&
3691                                     cname_ok) {
3692                                         /*
3693                                          * We may be finding a CNAME instead
3694                                          * of the desired type.
3695                                          *
3696                                          * If we've already got the CNAME RRSIG,
3697                                          * use it, otherwise change sigtype
3698                                          * so that we find it.
3699                                          */
3700                                         if (cnamesig != NULL)
3701                                                 foundsig = cnamesig;
3702                                         else
3703                                                 sigtype =
3704                                                     RBTDB_RDATATYPE_SIGCNAME;
3705                                 }
3706                                 /*
3707                                  * If we've got all we need, end the search.
3708                                  */
3709                                 if (!maybe_zonecut && foundsig != NULL)
3710                                         break;
3711                         } else if (header->type == sigtype) {
3712                                 /*
3713                                  * We've found the RRSIG rdataset for our
3714                                  * target type.  Remember it.
3715                                  */
3716                                 foundsig = header;
3717                                 /*
3718                                  * If we've got all we need, end the search.
3719                                  */
3720                                 if (!maybe_zonecut && found != NULL)
3721                                         break;
3722                         } else if (header->type == dns_rdatatype_nsec &&
3723                                    !search.rbtversion->havensec3) {
3724                                 /*
3725                                  * Remember a NSEC rdataset even if we're
3726                                  * not specifically looking for it, because
3727                                  * we might need it later.
3728                                  */
3729                                 nsecheader = header;
3730                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3731                                    !search.rbtversion->havensec3) {
3732                                 /*
3733                                  * If we need the NSEC rdataset, we'll also
3734                                  * need its signature.
3735                                  */
3736                                 nsecsig = header;
3737                         } else if (cname_ok &&
3738                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3739                                 /*
3740                                  * If we get a CNAME match, we'll also need
3741                                  * its signature.
3742                                  */
3743                                 cnamesig = header;
3744                         }
3745                 }
3746         }
3747
3748         if (empty_node) {
3749                 /*
3750                  * We have an exact match for the name, but there are no
3751                  * active rdatasets in the desired version.  That means that
3752                  * this node doesn't exist in the desired version, and that
3753                  * we really have a partial match.
3754                  */
3755                 if (!wild) {
3756                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3757                         goto partial_match;
3758                 }
3759         }
3760
3761         /*
3762          * If we didn't find what we were looking for...
3763          */
3764         if (found == NULL) {
3765                 if (search.zonecut != NULL) {
3766                         /*
3767                          * We were trying to find glue at a node beneath a
3768                          * zone cut, but didn't.
3769                          *
3770                          * Return the delegation.
3771                          */
3772                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3773                         result = setup_delegation(&search, nodep, foundname,
3774                                                   rdataset, sigrdataset);
3775                         goto tree_exit;
3776                 }
3777                 /*
3778                  * The desired type doesn't exist.
3779                  */
3780                 result = DNS_R_NXRRSET;
3781                 if (search.rbtversion->secure == dns_db_secure &&
3782                     !search.rbtversion->havensec3 &&
3783                     (nsecheader == NULL || nsecsig == NULL)) {
3784                         /*
3785                          * The zone is secure but there's no NSEC,
3786                          * or the NSEC has no signature!
3787                          */
3788                         if (!wild) {
3789                                 result = DNS_R_BADDB;
3790                                 goto node_exit;
3791                         }
3792
3793                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3794                         result = find_closest_nsec(&search, nodep, foundname,
3795                                                    rdataset, sigrdataset,
3796                                                    search.rbtdb->tree,
3797                                                    search.rbtversion->secure);
3798                         if (result == ISC_R_SUCCESS)
3799                                 result = DNS_R_EMPTYWILD;
3800                         goto tree_exit;
3801                 }
3802                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3803                     nsecheader == NULL)
3804                 {
3805                         /*
3806                          * There's no NSEC record, and we were told
3807                          * to find one.
3808                          */
3809                         result = DNS_R_BADDB;
3810                         goto node_exit;
3811                 }
3812                 if (nodep != NULL) {
3813                         new_reference(search.rbtdb, node);
3814                         *nodep = node;
3815                 }
3816                 if ((search.rbtversion->secure == dns_db_secure &&
3817                      !search.rbtversion->havensec3) ||
3818                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
3819                 {
3820                         bind_rdataset(search.rbtdb, node, nsecheader,
3821                                       0, rdataset);
3822                         if (nsecsig != NULL)
3823                                 bind_rdataset(search.rbtdb, node,
3824                                               nsecsig, 0, sigrdataset);
3825                 }
3826                 if (wild)
3827                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3828                 goto node_exit;
3829         }
3830
3831         /*
3832          * We found what we were looking for, or we found a CNAME.
3833          */
3834
3835         if (type != found->type &&
3836             type != dns_rdatatype_any &&
3837             found->type == dns_rdatatype_cname) {
3838                 /*
3839                  * We weren't doing an ANY query and we found a CNAME instead
3840                  * of the type we were looking for, so we need to indicate
3841                  * that result to the caller.
3842                  */
3843                 result = DNS_R_CNAME;
3844         } else if (search.zonecut != NULL) {
3845                 /*
3846                  * If we're beneath a zone cut, we must indicate that the
3847                  * result is glue, unless we're actually at the zone cut
3848                  * and the type is NSEC or KEY.
3849                  */
3850                 if (search.zonecut == node) {
3851                         /*
3852                          * It is not clear if KEY should still be
3853                          * allowed at the parent side of the zone
3854                          * cut or not.  It is needed for RFC3007
3855                          * validated updates.
3856                          */
3857                         if (type == dns_rdatatype_nsec ||
3858                             type == dns_rdatatype_nsec3 ||
3859                             type == dns_rdatatype_key)
3860                                 result = ISC_R_SUCCESS;
3861                         else if (type == dns_rdatatype_any)
3862                                 result = DNS_R_ZONECUT;
3863                         else
3864                                 result = DNS_R_GLUE;
3865                 } else
3866                         result = DNS_R_GLUE;
3867                 /*
3868                  * We might have found data that isn't glue, but was occluded
3869                  * by a dynamic update.  If the caller cares about this, they
3870                  * will have told us to validate glue.
3871                  *
3872                  * XXX We should cache the glue validity state!
3873                  */
3874                 if (result == DNS_R_GLUE &&
3875                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3876                     !valid_glue(&search, foundname, type, node)) {
3877                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3878                         result = setup_delegation(&search, nodep, foundname,
3879                                                   rdataset, sigrdataset);
3880                     goto tree_exit;
3881                 }
3882         } else {
3883                 /*
3884                  * An ordinary successful query!
3885                  */
3886                 result = ISC_R_SUCCESS;
3887         }
3888
3889         if (nodep != NULL) {
3890                 if (!at_zonecut)
3891                         new_reference(search.rbtdb, node);
3892                 else
3893                         search.need_cleanup = ISC_FALSE;
3894                 *nodep = node;
3895         }
3896
3897         if (type != dns_rdatatype_any) {
3898                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3899                 if (foundsig != NULL)
3900                         bind_rdataset(search.rbtdb, node, foundsig, 0,
3901                                       sigrdataset);
3902         }
3903
3904         if (wild)
3905                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3906
3907  node_exit:
3908         NODE_UNLOCK(lock, isc_rwlocktype_read);
3909
3910  tree_exit:
3911         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3912
3913         /*
3914          * If we found a zonecut but aren't going to use it, we have to
3915          * let go of it.
3916          */
3917         if (search.need_cleanup) {
3918                 node = search.zonecut;
3919                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3920
3921                 NODE_LOCK(lock, isc_rwlocktype_read);
3922                 decrement_reference(search.rbtdb, node, 0,
3923                                     isc_rwlocktype_read, isc_rwlocktype_none,
3924                                     ISC_FALSE);
3925                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3926         }
3927
3928         if (close_version)
3929                 closeversion(db, &version, ISC_FALSE);
3930
3931         dns_rbtnodechain_reset(&search.chain);
3932
3933         return (result);
3934 }
3935
3936 static isc_result_t
3937 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3938                  isc_stdtime_t now, dns_dbnode_t **nodep,
3939                  dns_name_t *foundname,
3940                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3941 {
3942         UNUSED(db);
3943         UNUSED(name);
3944         UNUSED(options);
3945         UNUSED(now);
3946         UNUSED(nodep);
3947         UNUSED(foundname);
3948         UNUSED(rdataset);
3949         UNUSED(sigrdataset);
3950
3951         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3952
3953         return (ISC_R_NOTIMPLEMENTED);
3954 }
3955
3956 static isc_result_t
3957 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3958         rbtdb_search_t *search = arg;
3959         rdatasetheader_t *header, *header_prev, *header_next;
3960         rdatasetheader_t *dname_header, *sigdname_header;
3961         isc_result_t result;
3962         nodelock_t *lock;
3963         isc_rwlocktype_t locktype;
3964
3965         /* XXX comment */
3966
3967         REQUIRE(search->zonecut == NULL);
3968
3969         /*
3970          * Keep compiler silent.
3971          */
3972         UNUSED(name);
3973
3974         lock = &(search->rbtdb->node_locks[node->locknum].lock);
3975         locktype = isc_rwlocktype_read;
3976         NODE_LOCK(lock, locktype);
3977
3978         /*
3979          * Look for a DNAME or RRSIG DNAME rdataset.
3980          */
3981         dname_header = NULL;
3982         sigdname_header = NULL;
3983         header_prev = NULL;
3984         for (header = node->data; header != NULL; header = header_next) {
3985                 header_next = header->next;
3986                 if (header->rdh_ttl <= search->now) {
3987                         /*
3988                          * This rdataset is stale.  If no one else is
3989                          * using the node, we can clean it up right
3990                          * now, otherwise we mark it as stale, and
3991                          * the node as dirty, so it will get cleaned
3992                          * up later.
3993                          */
3994                         if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
3995                             (locktype == isc_rwlocktype_write ||
3996                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3997                                 /*
3998                                  * We update the node's status only when we
3999                                  * can get write access; otherwise, we leave
4000                                  * others to this work.  Periodical cleaning
4001                                  * will eventually take the job as the last
4002                                  * resort.
4003                                  * We won't downgrade the lock, since other
4004                                  * rdatasets are probably stale, too.
4005                                  */
4006                                 locktype = isc_rwlocktype_write;
4007
4008                                 if (dns_rbtnode_refcurrent(node) == 0) {
4009                                         isc_mem_t *mctx;
4010
4011                                         /*
4012                                          * header->down can be non-NULL if the
4013                                          * refcount has just decremented to 0
4014                                          * but decrement_reference() has not
4015                                          * performed clean_cache_node(), in
4016                                          * which case we need to purge the
4017                                          * stale headers first.
4018                                          */
4019                                         mctx = search->rbtdb->common.mctx;
4020                                         clean_stale_headers(search->rbtdb,
4021                                                             mctx,
4022                                                             header);
4023                                         if (header_prev != NULL)
4024                                                 header_prev->next =
4025                                                         header->next;
4026                                         else
4027                                                 node->data = header->next;
4028                                         free_rdataset(search->rbtdb, mctx,
4029                                                       header);
4030                                 } else {
4031                                         header->attributes |=
4032                                                 RDATASET_ATTR_STALE;
4033                                         node->dirty = 1;
4034                                         header_prev = header;
4035                                 }
4036                         } else
4037                                 header_prev = header;
4038                 } else if (header->type == dns_rdatatype_dname &&
4039                            EXISTS(header)) {
4040                         dname_header = header;
4041                         header_prev = header;
4042                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4043                          EXISTS(header)) {
4044                         sigdname_header = header;
4045                         header_prev = header;
4046                 } else
4047                         header_prev = header;
4048         }
4049
4050         if (dname_header != NULL &&
4051             (!DNS_TRUST_PENDING(dname_header->trust) ||
4052              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4053                 /*
4054                  * We increment the reference count on node to ensure that
4055                  * search->zonecut_rdataset will still be valid later.
4056                  */
4057                 new_reference(search->rbtdb, node);
4058                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4059                 search->zonecut = node;
4060                 search->zonecut_rdataset = dname_header;
4061                 search->zonecut_sigrdataset = sigdname_header;
4062                 search->need_cleanup = ISC_TRUE;
4063                 result = DNS_R_PARTIALMATCH;
4064         } else
4065                 result = DNS_R_CONTINUE;
4066
4067         NODE_UNLOCK(lock, locktype);
4068
4069         return (result);
4070 }
4071
4072 static inline isc_result_t
4073 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4074                      dns_dbnode_t **nodep, dns_name_t *foundname,
4075                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4076 {
4077         unsigned int i;
4078         dns_rbtnode_t *level_node;
4079         rdatasetheader_t *header, *header_prev, *header_next;
4080         rdatasetheader_t *found, *foundsig;
4081         isc_result_t result = ISC_R_NOTFOUND;
4082         dns_name_t name;
4083         dns_rbtdb_t *rbtdb;
4084         isc_boolean_t done;
4085         nodelock_t *lock;
4086         isc_rwlocktype_t locktype;
4087
4088         /*
4089          * Caller must be holding the tree lock.
4090          */
4091
4092         rbtdb = search->rbtdb;
4093         i = search->chain.level_matches;
4094         done = ISC_FALSE;
4095         do {
4096                 locktype = isc_rwlocktype_read;
4097                 lock = &rbtdb->node_locks[node->locknum].lock;
4098                 NODE_LOCK(lock, locktype);
4099
4100                 /*
4101                  * Look for NS and RRSIG NS rdatasets.
4102                  */
4103                 found = NULL;
4104                 foundsig = NULL;
4105                 header_prev = NULL;
4106                 for (header = node->data;
4107                      header != NULL;
4108                      header = header_next) {
4109                         header_next = header->next;
4110                         if (header->rdh_ttl <= search->now) {
4111                                 /*
4112                                  * This rdataset is stale.  If no one else is
4113                                  * using the node, we can clean it up right
4114                                  * now, otherwise we mark it as stale, and
4115                                  * the node as dirty, so it will get cleaned
4116                                  * up later.
4117                                  */
4118                                 if ((header->rdh_ttl <= search->now -
4119                                                     RBTDB_VIRTUAL) &&
4120                                     (locktype == isc_rwlocktype_write ||
4121                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4122                                         /*
4123                                          * We update the node's status only
4124                                          * when we can get write access.
4125                                          */
4126                                         locktype = isc_rwlocktype_write;
4127
4128                                         if (dns_rbtnode_refcurrent(node)
4129                                             == 0) {
4130                                                 isc_mem_t *m;
4131
4132                                                 m = search->rbtdb->common.mctx;
4133                                                 clean_stale_headers(
4134                                                         search->rbtdb,
4135                                                         m, header);
4136                                                 if (header_prev != NULL)
4137                                                         header_prev->next =
4138                                                                 header->next;
4139                                                 else
4140                                                         node->data =
4141                                                                 header->next;
4142                                                 free_rdataset(rbtdb, m,
4143                                                               header);
4144                                         } else {
4145                                                 header->attributes |=
4146                                                         RDATASET_ATTR_STALE;
4147                                                 node->dirty = 1;
4148                                                 header_prev = header;
4149                                         }
4150                                 } else
4151                                         header_prev = header;
4152                         } else if (EXISTS(header)) {
4153                                 /*
4154                                  * We've found an extant rdataset.  See if
4155                                  * we're interested in it.
4156                                  */
4157                                 if (header->type == dns_rdatatype_ns) {
4158                                         found = header;
4159                                         if (foundsig != NULL)
4160                                                 break;
4161                                 } else if (header->type ==
4162                                            RBTDB_RDATATYPE_SIGNS) {
4163                                         foundsig = header;
4164                                         if (found != NULL)
4165                                                 break;
4166                                 }
4167                                 header_prev = header;
4168                         } else
4169                                 header_prev = header;
4170                 }
4171
4172                 if (found != NULL) {
4173                         /*
4174                          * If we have to set foundname, we do it before
4175                          * anything else.  If we were to set foundname after
4176                          * we had set nodep or bound the rdataset, then we'd
4177                          * have to undo that work if dns_name_concatenate()
4178                          * failed.  By setting foundname first, there's
4179                          * nothing to undo if we have trouble.
4180                          */
4181                         if (foundname != NULL) {
4182                                 dns_name_init(&name, NULL);
4183                                 dns_rbt_namefromnode(node, &name);
4184                                 result = dns_name_copy(&name, foundname, NULL);
4185                                 while (result == ISC_R_SUCCESS && i > 0) {
4186                                         i--;
4187                                         level_node = search->chain.levels[i];
4188                                         dns_name_init(&name, NULL);
4189                                         dns_rbt_namefromnode(level_node,
4190                                                              &name);
4191                                         result =
4192                                                 dns_name_concatenate(foundname,
4193                                                                      &name,
4194                                                                      foundname,
4195                                                                      NULL);
4196                                 }
4197                                 if (result != ISC_R_SUCCESS) {
4198                                         *nodep = NULL;
4199                                         goto node_exit;
4200                                 }
4201                         }
4202                         result = DNS_R_DELEGATION;
4203                         if (nodep != NULL) {
4204                                 new_reference(search->rbtdb, node);
4205                                 *nodep = node;
4206                         }
4207                         bind_rdataset(search->rbtdb, node, found, search->now,
4208                                       rdataset);
4209                         if (foundsig != NULL)
4210                                 bind_rdataset(search->rbtdb, node, foundsig,
4211                                               search->now, sigrdataset);
4212                         if (need_headerupdate(found, search->now) ||
4213                             (foundsig != NULL &&
4214                              need_headerupdate(foundsig, search->now))) {
4215                                 if (locktype != isc_rwlocktype_write) {
4216                                         NODE_UNLOCK(lock, locktype);
4217                                         NODE_LOCK(lock, isc_rwlocktype_write);
4218                                         locktype = isc_rwlocktype_write;
4219                                 }
4220                                 if (need_headerupdate(found, search->now))
4221                                         update_header(search->rbtdb, found,
4222                                                       search->now);
4223                                 if (foundsig != NULL &&
4224                                     need_headerupdate(foundsig, search->now)) {
4225                                         update_header(search->rbtdb, foundsig,
4226                                                       search->now);
4227                                 }
4228                         }
4229                 }
4230
4231         node_exit:
4232                 NODE_UNLOCK(lock, locktype);
4233
4234                 if (found == NULL && i > 0) {
4235                         i--;
4236                         node = search->chain.levels[i];
4237                 } else
4238                         done = ISC_TRUE;
4239
4240         } while (!done);
4241
4242         return (result);
4243 }
4244
4245 static isc_result_t
4246 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4247                   isc_stdtime_t now, dns_name_t *foundname,
4248                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4249 {
4250         dns_rbtnode_t *node;
4251         rdatasetheader_t *header, *header_next, *header_prev;
4252         rdatasetheader_t *found, *foundsig;
4253         isc_boolean_t empty_node;
4254         isc_result_t result;
4255         dns_fixedname_t fname, forigin;
4256         dns_name_t *name, *origin;
4257         rbtdb_rdatatype_t matchtype, sigmatchtype;
4258         nodelock_t *lock;
4259         isc_rwlocktype_t locktype;
4260
4261         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4262         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4263                                              dns_rdatatype_nsec);
4264
4265         do {
4266                 node = NULL;
4267                 dns_fixedname_init(&fname);
4268                 name = dns_fixedname_name(&fname);
4269                 dns_fixedname_init(&forigin);
4270                 origin = dns_fixedname_name(&forigin);
4271                 result = dns_rbtnodechain_current(&search->chain, name,
4272                                                   origin, &node);
4273                 if (result != ISC_R_SUCCESS)
4274                         return (result);
4275                 locktype = isc_rwlocktype_read;
4276                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4277                 NODE_LOCK(lock, locktype);
4278                 found = NULL;
4279                 foundsig = NULL;
4280                 empty_node = ISC_TRUE;
4281                 header_prev = NULL;
4282                 for (header = node->data;
4283                      header != NULL;
4284                      header = header_next) {
4285                         header_next = header->next;
4286                         if (header->rdh_ttl <= now) {
4287                                 /*
4288                                  * This rdataset is stale.  If no one else is
4289                                  * using the node, we can clean it up right
4290                                  * now, otherwise we mark it as stale, and the
4291                                  * node as dirty, so it will get cleaned up
4292                                  * later.
4293                                  */
4294                                 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4295                                     (locktype == isc_rwlocktype_write ||
4296                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4297                                         /*
4298                                          * We update the node's status only
4299                                          * when we can get write access.
4300                                          */
4301                                         locktype = isc_rwlocktype_write;
4302
4303                                         if (dns_rbtnode_refcurrent(node)
4304                                             == 0) {
4305                                                 isc_mem_t *m;
4306
4307                                                 m = search->rbtdb->common.mctx;
4308                                                 clean_stale_headers(
4309                                                         search->rbtdb,
4310                                                         m, header);
4311                                                 if (header_prev != NULL)
4312                                                         header_prev->next =
4313                                                                 header->next;
4314                                                 else
4315                                                         node->data = header->next;
4316                                                 free_rdataset(search->rbtdb, m,
4317                                                               header);
4318                                         } else {
4319                                                 header->attributes |=
4320                                                         RDATASET_ATTR_STALE;
4321                                                 node->dirty = 1;
4322                                                 header_prev = header;
4323                                         }
4324                                 } else
4325                                         header_prev = header;
4326                                 continue;
4327                         }
4328                         if (NONEXISTENT(header) ||
4329                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
4330                                 header_prev = header;
4331                                 continue;
4332                         }
4333                         empty_node = ISC_FALSE;
4334                         if (header->type == matchtype)
4335                                 found = header;
4336                         else if (header->type == sigmatchtype)
4337                                 foundsig = header;
4338                         header_prev = header;
4339                 }
4340                 if (found != NULL) {
4341                         result = dns_name_concatenate(name, origin,
4342                                                       foundname, NULL);
4343                         if (result != ISC_R_SUCCESS)
4344                                 goto unlock_node;
4345                         bind_rdataset(search->rbtdb, node, found,
4346                                       now, rdataset);
4347                         if (foundsig != NULL)
4348                                 bind_rdataset(search->rbtdb, node, foundsig,
4349                                               now, sigrdataset);
4350                         new_reference(search->rbtdb, node);
4351                         *nodep = node;
4352                         result = DNS_R_COVERINGNSEC;
4353                 } else if (!empty_node) {
4354                         result = ISC_R_NOTFOUND;
4355                 } else
4356                         result = dns_rbtnodechain_prev(&search->chain, NULL,
4357                                                        NULL);
4358  unlock_node:
4359                 NODE_UNLOCK(lock, locktype);
4360         } while (empty_node && result == ISC_R_SUCCESS);
4361         return (result);
4362 }
4363
4364 static isc_result_t
4365 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4366            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4367            dns_dbnode_t **nodep, dns_name_t *foundname,
4368            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4369 {
4370         dns_rbtnode_t *node = NULL;
4371         isc_result_t result;
4372         rbtdb_search_t search;
4373         isc_boolean_t cname_ok = ISC_TRUE;
4374         isc_boolean_t empty_node;
4375         nodelock_t *lock;
4376         isc_rwlocktype_t locktype;
4377         rdatasetheader_t *header, *header_prev, *header_next;
4378         rdatasetheader_t *found, *nsheader;
4379         rdatasetheader_t *foundsig, *nssig, *cnamesig;
4380         rdatasetheader_t *update, *updatesig;
4381         rbtdb_rdatatype_t sigtype, negtype;
4382
4383         UNUSED(version);
4384
4385         search.rbtdb = (dns_rbtdb_t *)db;
4386
4387         REQUIRE(VALID_RBTDB(search.rbtdb));
4388         REQUIRE(version == NULL);
4389
4390         if (now == 0)
4391                 isc_stdtime_get(&now);
4392
4393         search.rbtversion = NULL;
4394         search.serial = 1;
4395         search.options = options;
4396         search.copy_name = ISC_FALSE;
4397         search.need_cleanup = ISC_FALSE;
4398         search.wild = ISC_FALSE;
4399         search.zonecut = NULL;
4400         dns_fixedname_init(&search.zonecut_name);
4401         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4402         search.now = now;
4403         update = NULL;
4404         updatesig = NULL;
4405
4406         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4407
4408         /*
4409          * Search down from the root of the tree.  If, while going down, we
4410          * encounter a callback node, cache_zonecut_callback() will search the
4411          * rdatasets at the zone cut for a DNAME rdataset.
4412          */
4413         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4414                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
4415                                   cache_zonecut_callback, &search);
4416
4417         if (result == DNS_R_PARTIALMATCH) {
4418                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4419                         result = find_coveringnsec(&search, nodep, now,
4420                                                    foundname, rdataset,
4421                                                    sigrdataset);
4422                         if (result == DNS_R_COVERINGNSEC)
4423                                 goto tree_exit;
4424                 }
4425                 if (search.zonecut != NULL) {
4426                     result = setup_delegation(&search, nodep, foundname,
4427                                               rdataset, sigrdataset);
4428                     goto tree_exit;
4429                 } else {
4430                 find_ns:
4431                         result = find_deepest_zonecut(&search, node, nodep,
4432                                                       foundname, rdataset,
4433                                                       sigrdataset);
4434                         goto tree_exit;
4435                 }
4436         } else if (result != ISC_R_SUCCESS)
4437                 goto tree_exit;
4438
4439         /*
4440          * Certain DNSSEC types are not subject to CNAME matching
4441          * (RFC4035, section 2.5 and RFC3007).
4442          *
4443          * We don't check for RRSIG, because we don't store RRSIG records
4444          * directly.
4445          */
4446         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4447                 cname_ok = ISC_FALSE;
4448
4449         /*
4450          * We now go looking for rdata...
4451          */
4452
4453         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4454         locktype = isc_rwlocktype_read;
4455         NODE_LOCK(lock, locktype);
4456
4457         found = NULL;
4458         foundsig = NULL;
4459         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4460         negtype = RBTDB_RDATATYPE_VALUE(0, type);
4461         nsheader = NULL;
4462         nssig = NULL;
4463         cnamesig = NULL;
4464         empty_node = ISC_TRUE;
4465         header_prev = NULL;
4466         for (header = node->data; header != NULL; header = header_next) {
4467                 header_next = header->next;
4468                 if (header->rdh_ttl <= now) {
4469                         /*
4470                          * This rdataset is stale.  If no one else is using the
4471                          * node, we can clean it up right now, otherwise we
4472                          * mark it as stale, and the node as dirty, so it will
4473                          * get cleaned up later.
4474                          */
4475                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4476                             (locktype == isc_rwlocktype_write ||
4477                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4478                                 /*
4479                                  * We update the node's status only when we
4480                                  * can get write access.
4481                                  */
4482                                 locktype = isc_rwlocktype_write;
4483
4484                                 if (dns_rbtnode_refcurrent(node) == 0) {
4485                                         isc_mem_t *mctx;
4486
4487                                         mctx = search.rbtdb->common.mctx;
4488                                         clean_stale_headers(search.rbtdb, mctx,
4489                                                             header);
4490                                         if (header_prev != NULL)
4491                                                 header_prev->next =
4492                                                         header->next;
4493                                         else
4494                                                 node->data = header->next;
4495                                         free_rdataset(search.rbtdb, mctx,
4496                                                       header);
4497                                 } else {
4498                                         header->attributes |=
4499                                                 RDATASET_ATTR_STALE;
4500                                         node->dirty = 1;
4501                                         header_prev = header;
4502                                 }
4503                         } else
4504                                 header_prev = header;
4505                 } else if (EXISTS(header)) {
4506                         /*
4507                          * We now know that there is at least one active
4508                          * non-stale rdataset at this node.
4509                          */
4510                         empty_node = ISC_FALSE;
4511
4512                         /*
4513                          * If we found a type we were looking for, remember
4514                          * it.
4515                          */
4516                         if (header->type == type ||
4517                             (type == dns_rdatatype_any &&
4518                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4519                             (cname_ok && header->type ==
4520                              dns_rdatatype_cname)) {
4521                                 /*
4522                                  * We've found the answer.
4523                                  */
4524                                 found = header;
4525                                 if (header->type == dns_rdatatype_cname &&
4526                                     cname_ok &&
4527                                     cnamesig != NULL) {
4528                                         /*
4529                                          * If we've already got the CNAME RRSIG,
4530                                          * use it, otherwise change sigtype
4531                                          * so that we find it.
4532                                          */
4533                                         if (cnamesig != NULL)
4534                                                 foundsig = cnamesig;
4535                                         else
4536                                                 sigtype =
4537                                                     RBTDB_RDATATYPE_SIGCNAME;
4538                                         foundsig = cnamesig;
4539                                 }
4540                         } else if (header->type == sigtype) {
4541                                 /*
4542                                  * We've found the RRSIG rdataset for our
4543                                  * target type.  Remember it.
4544                                  */
4545                                 foundsig = header;
4546                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4547                                    header->type == negtype) {
4548                                 /*
4549                                  * We've found a negative cache entry.
4550                                  */
4551                                 found = header;
4552                         } else if (header->type == dns_rdatatype_ns) {
4553                                 /*
4554                                  * Remember a NS rdataset even if we're
4555                                  * not specifically looking for it, because
4556                                  * we might need it later.
4557                                  */
4558                                 nsheader = header;
4559                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4560                                 /*
4561                                  * If we need the NS rdataset, we'll also
4562                                  * need its signature.
4563                                  */
4564                                 nssig = header;
4565                         } else if (cname_ok &&
4566                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
4567                                 /*
4568                                  * If we get a CNAME match, we'll also need
4569                                  * its signature.
4570                                  */
4571                                 cnamesig = header;
4572                         }
4573                         header_prev = header;
4574                 } else
4575                         header_prev = header;
4576         }
4577
4578         if (empty_node) {
4579                 /*
4580                  * We have an exact match for the name, but there are no
4581                  * extant rdatasets.  That means that this node doesn't
4582                  * meaningfully exist, and that we really have a partial match.
4583                  */
4584                 NODE_UNLOCK(lock, locktype);
4585                 goto find_ns;
4586         }
4587
4588         /*
4589          * If we didn't find what we were looking for...
4590          */
4591         if (found == NULL ||
4592             (DNS_TRUST_ADDITIONAL(found->trust) &&
4593              ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4594             (found->trust == dns_trust_glue &&
4595              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4596             (DNS_TRUST_PENDING(found->trust) &&
4597              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4598                 /*
4599                  * If there is an NS rdataset at this node, then this is the
4600                  * deepest zone cut.
4601                  */
4602                 if (nsheader != NULL) {
4603                         if (nodep != NULL) {
4604                                 new_reference(search.rbtdb, node);
4605                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4606                                 *nodep = node;
4607                         }
4608                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
4609                                       rdataset);
4610                         if (need_headerupdate(nsheader, search.now))
4611                                 update = nsheader;
4612                         if (nssig != NULL) {
4613                                 bind_rdataset(search.rbtdb, node, nssig,
4614                                               search.now, sigrdataset);
4615                                 if (need_headerupdate(nssig, search.now))
4616                                         updatesig = nssig;
4617                         }
4618                         result = DNS_R_DELEGATION;
4619                         goto node_exit;
4620                 }
4621
4622                 /*
4623                  * Go find the deepest zone cut.
4624                  */
4625                 NODE_UNLOCK(lock, locktype);
4626                 goto find_ns;
4627         }
4628
4629         /*
4630          * We found what we were looking for, or we found a CNAME.
4631          */
4632
4633         if (nodep != NULL) {
4634                 new_reference(search.rbtdb, node);
4635                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4636                 *nodep = node;
4637         }
4638
4639         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4640                 /*
4641                  * We found a negative cache entry.
4642                  */
4643                 if (NXDOMAIN(found))
4644                         result = DNS_R_NCACHENXDOMAIN;
4645                 else
4646                         result = DNS_R_NCACHENXRRSET;
4647         } else if (type != found->type &&
4648                    type != dns_rdatatype_any &&
4649                    found->type == dns_rdatatype_cname) {
4650                 /*
4651                  * We weren't doing an ANY query and we found a CNAME instead
4652                  * of the type we were looking for, so we need to indicate
4653                  * that result to the caller.
4654                  */
4655                 result = DNS_R_CNAME;
4656         } else {
4657                 /*
4658                  * An ordinary successful query!
4659                  */
4660                 result = ISC_R_SUCCESS;
4661         }
4662
4663         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4664             result == DNS_R_NCACHENXRRSET) {
4665                 bind_rdataset(search.rbtdb, node, found, search.now,
4666                               rdataset);
4667                 if (need_headerupdate(found, search.now))
4668                         update = found;
4669                 if (foundsig != NULL) {
4670                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
4671                                       sigrdataset);
4672                         if (need_headerupdate(foundsig, search.now))
4673                                 updatesig = foundsig;
4674                 }
4675         }
4676
4677  node_exit:
4678         if ((update != NULL || updatesig != NULL) &&
4679             locktype != isc_rwlocktype_write) {
4680                 NODE_UNLOCK(lock, locktype);
4681                 NODE_LOCK(lock, isc_rwlocktype_write);
4682                 locktype = isc_rwlocktype_write;
4683         }
4684         if (update != NULL && need_headerupdate(update, search.now))
4685                 update_header(search.rbtdb, update, search.now);
4686         if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4687                 update_header(search.rbtdb, updatesig, search.now);
4688
4689         NODE_UNLOCK(lock, locktype);
4690
4691  tree_exit:
4692         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4693
4694         /*
4695          * If we found a zonecut but aren't going to use it, we have to
4696          * let go of it.
4697          */
4698         if (search.need_cleanup) {
4699                 node = search.zonecut;
4700                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4701
4702                 NODE_LOCK(lock, isc_rwlocktype_read);
4703                 decrement_reference(search.rbtdb, node, 0,
4704                                     isc_rwlocktype_read, isc_rwlocktype_none,
4705                                     ISC_FALSE);
4706                 NODE_UNLOCK(lock, isc_rwlocktype_read);
4707         }
4708
4709         dns_rbtnodechain_reset(&search.chain);
4710
4711         return (result);
4712 }
4713
4714 static isc_result_t
4715 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4716                   isc_stdtime_t now, dns_dbnode_t **nodep,
4717                   dns_name_t *foundname,
4718                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4719 {
4720         dns_rbtnode_t *node = NULL;
4721         nodelock_t *lock;
4722         isc_result_t result;
4723         rbtdb_search_t search;
4724         rdatasetheader_t *header, *header_prev, *header_next;
4725         rdatasetheader_t *found, *foundsig;
4726         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4727         isc_rwlocktype_t locktype;
4728
4729         search.rbtdb = (dns_rbtdb_t *)db;
4730
4731         REQUIRE(VALID_RBTDB(search.rbtdb));
4732
4733         if (now == 0)
4734                 isc_stdtime_get(&now);
4735
4736         search.rbtversion = NULL;
4737         search.serial = 1;
4738         search.options = options;
4739         search.copy_name = ISC_FALSE;
4740         search.need_cleanup = ISC_FALSE;
4741         search.wild = ISC_FALSE;
4742         search.zonecut = NULL;
4743         dns_fixedname_init(&search.zonecut_name);
4744         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4745         search.now = now;
4746
4747         if ((options & DNS_DBFIND_NOEXACT) != 0)
4748                 rbtoptions |= DNS_RBTFIND_NOEXACT;
4749
4750         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4751
4752         /*
4753          * Search down from the root of the tree.
4754          */
4755         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4756                                   &search.chain, rbtoptions, NULL, &search);
4757
4758         if (result == DNS_R_PARTIALMATCH) {
4759         find_ns:
4760                 result = find_deepest_zonecut(&search, node, nodep, foundname,
4761                                               rdataset, sigrdataset);
4762                 goto tree_exit;
4763         } else if (result != ISC_R_SUCCESS)
4764                 goto tree_exit;
4765
4766         /*
4767          * We now go looking for an NS rdataset at the node.
4768          */
4769
4770         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4771         locktype = isc_rwlocktype_read;
4772         NODE_LOCK(lock, locktype);
4773
4774         found = NULL;
4775         foundsig = NULL;
4776         header_prev = NULL;
4777         for (header = node->data; header != NULL; header = header_next) {
4778                 header_next = header->next;
4779                 if (header->rdh_ttl <= now) {
4780                         /*
4781                          * This rdataset is stale.  If no one else is using the
4782                          * node, we can clean it up right now, otherwise we
4783                          * mark it as stale, and the node as dirty, so it will
4784                          * get cleaned up later.
4785                          */
4786                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4787                             (locktype == isc_rwlocktype_write ||
4788                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4789                                 /*
4790                                  * We update the node's status only when we
4791                                  * can get write access.
4792                                  */
4793                                 locktype = isc_rwlocktype_write;
4794
4795                                 if (dns_rbtnode_refcurrent(node) == 0) {
4796                                         isc_mem_t *mctx;
4797
4798                                         mctx = search.rbtdb->common.mctx;
4799                                         clean_stale_headers(search.rbtdb, mctx,
4800                                                             header);
4801                                         if (header_prev != NULL)
4802                                                 header_prev->next =
4803                                                         header->next;
4804                                         else
4805                                                 node->data = header->next;
4806                                         free_rdataset(search.rbtdb, mctx,
4807                                                       header);
4808                                 } else {
4809                                         header->attributes |=
4810                                                 RDATASET_ATTR_STALE;
4811                                         node->dirty = 1;
4812                                         header_prev = header;
4813                                 }
4814                         } else
4815                                 header_prev = header;
4816                 } else if (EXISTS(header)) {
4817                         /*
4818                          * If we found a type we were looking for, remember
4819                          * it.
4820                          */
4821                         if (header->type == dns_rdatatype_ns) {
4822                                 /*
4823                                  * Remember a NS rdataset even if we're
4824                                  * not specifically looking for it, because
4825                                  * we might need it later.
4826                                  */
4827                                 found = header;
4828                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4829                                 /*
4830                                  * If we need the NS rdataset, we'll also
4831                                  * need its signature.
4832                                  */
4833                                 foundsig = header;
4834                         }
4835                         header_prev = header;
4836                 } else
4837                         header_prev = header;
4838         }
4839
4840         if (found == NULL) {
4841                 /*
4842                  * No NS records here.
4843                  */
4844                 NODE_UNLOCK(lock, locktype);
4845                 goto find_ns;
4846         }
4847
4848         if (nodep != NULL) {
4849                 new_reference(search.rbtdb, node);
4850                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4851                 *nodep = node;
4852         }
4853
4854         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4855         if (foundsig != NULL)
4856                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4857                               sigrdataset);
4858
4859         if (need_headerupdate(found, search.now) ||
4860             (foundsig != NULL &&  need_headerupdate(foundsig, search.now))) {
4861                 if (locktype != isc_rwlocktype_write) {
4862                         NODE_UNLOCK(lock, locktype);
4863                         NODE_LOCK(lock, isc_rwlocktype_write);
4864                         locktype = isc_rwlocktype_write;
4865                 }
4866                 if (need_headerupdate(found, search.now))
4867                         update_header(search.rbtdb, found, search.now);
4868                 if (foundsig != NULL &&
4869                     need_headerupdate(foundsig, search.now)) {
4870                         update_header(search.rbtdb, foundsig, search.now);
4871                 }
4872         }
4873
4874         NODE_UNLOCK(lock, locktype);
4875
4876  tree_exit:
4877         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4878
4879         INSIST(!search.need_cleanup);
4880
4881         dns_rbtnodechain_reset(&search.chain);
4882
4883         if (result == DNS_R_DELEGATION)
4884                 result = ISC_R_SUCCESS;
4885
4886         return (result);
4887 }
4888
4889 static void
4890 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4891         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4892         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4893         unsigned int refs;
4894
4895         REQUIRE(VALID_RBTDB(rbtdb));
4896         REQUIRE(targetp != NULL && *targetp == NULL);
4897
4898         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4899         dns_rbtnode_refincrement(node, &refs);
4900         INSIST(refs != 0);
4901         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4902
4903         *targetp = source;
4904 }
4905
4906 static void
4907 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4908         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4909         dns_rbtnode_t *node;
4910         isc_boolean_t want_free = ISC_FALSE;
4911         isc_boolean_t inactive = ISC_FALSE;
4912         rbtdb_nodelock_t *nodelock;
4913
4914         REQUIRE(VALID_RBTDB(rbtdb));
4915         REQUIRE(targetp != NULL && *targetp != NULL);
4916
4917         node = (dns_rbtnode_t *)(*targetp);
4918         nodelock = &rbtdb->node_locks[node->locknum];
4919
4920         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4921
4922         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4923                                 isc_rwlocktype_none, ISC_FALSE)) {
4924                 if (isc_refcount_current(&nodelock->references) == 0 &&
4925                     nodelock->exiting) {
4926                         inactive = ISC_TRUE;
4927                 }
4928         }
4929
4930         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4931
4932         *targetp = NULL;
4933
4934         if (inactive) {
4935                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4936                 rbtdb->active--;
4937                 if (rbtdb->active == 0)
4938                         want_free = ISC_TRUE;
4939                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4940                 if (want_free) {
4941                         char buf[DNS_NAME_FORMATSIZE];
4942                         if (dns_name_dynamic(&rbtdb->common.origin))
4943                                 dns_name_format(&rbtdb->common.origin, buf,
4944                                                 sizeof(buf));
4945                         else
4946                                 strcpy(buf, "<UNKNOWN>");
4947                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4948                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4949                                       "calling free_rbtdb(%s)", buf);
4950                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
4951                 }
4952         }
4953 }
4954
4955 static isc_result_t
4956 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4957         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4958         dns_rbtnode_t *rbtnode = node;
4959         rdatasetheader_t *header;
4960         isc_boolean_t force_expire = ISC_FALSE;
4961         /*
4962          * These are the category and module used by the cache cleaner.
4963          */
4964         isc_boolean_t log = ISC_FALSE;
4965         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4966         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4967         int level = ISC_LOG_DEBUG(2);
4968         char printname[DNS_NAME_FORMATSIZE];
4969
4970         REQUIRE(VALID_RBTDB(rbtdb));
4971
4972         /*
4973          * Caller must hold a tree lock.
4974          */
4975
4976         if (now == 0)
4977                 isc_stdtime_get(&now);
4978
4979         if (isc_mem_isovermem(rbtdb->common.mctx)) {
4980                 isc_uint32_t val;
4981
4982                 isc_random_get(&val);
4983                 /*
4984                  * XXXDCL Could stand to have a better policy, like LRU.
4985                  */
4986                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
4987
4988                 /*
4989                  * Note that 'log' can be true IFF overmem is also true.
4990                  * overmem can currently only be true for cache
4991                  * databases -- hence all of the "overmem cache" log strings.
4992                  */
4993                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
4994                 if (log)
4995                         isc_log_write(dns_lctx, category, module, level,
4996                                       "overmem cache: %s %s",
4997                                       force_expire ? "FORCE" : "check",
4998                                       dns_rbt_formatnodename(rbtnode,
4999                                                            printname,
5000                                                            sizeof(printname)));
5001         }
5002
5003         /*
5004          * We may not need write access, but this code path is not performance
5005          * sensitive, so it should be okay to always lock as a writer.
5006          */
5007         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5008                   isc_rwlocktype_write);
5009
5010         for (header = rbtnode->data; header != NULL; header = header->next)
5011                 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
5012                         /*
5013                          * We don't check if refcurrent(rbtnode) == 0 and try
5014                          * to free like we do in cache_find(), because
5015                          * refcurrent(rbtnode) must be non-zero.  This is so
5016                          * because 'node' is an argument to the function.
5017                          */
5018                         header->attributes |= RDATASET_ATTR_STALE;
5019                         rbtnode->dirty = 1;
5020                         if (log)
5021                                 isc_log_write(dns_lctx, category, module,
5022                                               level, "overmem cache: stale %s",
5023                                               printname);
5024                 } else if (force_expire) {
5025                         if (! RETAIN(header)) {
5026                                 set_ttl(rbtdb, header, 0);
5027                                 header->attributes |= RDATASET_ATTR_STALE;
5028                                 rbtnode->dirty = 1;
5029                         } else if (log) {
5030                                 isc_log_write(dns_lctx, category, module,
5031                                               level, "overmem cache: "
5032                                               "reprieve by RETAIN() %s",
5033                                               printname);
5034                         }
5035                 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
5036                         isc_log_write(dns_lctx, category, module, level,
5037                                       "overmem cache: saved %s", printname);
5038
5039         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5040                     isc_rwlocktype_write);
5041
5042         return (ISC_R_SUCCESS);
5043 }
5044
5045 static void
5046 overmem(dns_db_t *db, isc_boolean_t overmem) {
5047         /* This is an empty callback.  See adb.c:water() */
5048
5049         UNUSED(db);
5050         UNUSED(overmem);
5051
5052         return;
5053 }
5054
5055 static void
5056 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5057         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5058         dns_rbtnode_t *rbtnode = node;
5059         isc_boolean_t first;
5060
5061         REQUIRE(VALID_RBTDB(rbtdb));
5062
5063         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5064                   isc_rwlocktype_read);
5065
5066         fprintf(out, "node %p, %u references, locknum = %u\n",
5067                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5068                 rbtnode->locknum);
5069         if (rbtnode->data != NULL) {
5070                 rdatasetheader_t *current, *top_next;
5071
5072                 for (current = rbtnode->data; current != NULL;
5073                      current = top_next) {
5074                         top_next = current->next;
5075                         first = ISC_TRUE;
5076                         fprintf(out, "\ttype %u", current->type);
5077                         do {
5078                                 if (!first)
5079                                         fprintf(out, "\t");
5080                                 first = ISC_FALSE;
5081                                 fprintf(out,
5082                                         "\tserial = %lu, ttl = %u, "
5083                                         "trust = %u, attributes = %u, "
5084                                         "resign = %u\n",
5085                                         (unsigned long)current->serial,
5086                                         current->rdh_ttl,
5087                                         current->trust,
5088                                         current->attributes,
5089                                         current->resign);
5090                                 current = current->down;
5091                         } while (current != NULL);
5092                 }
5093         } else
5094                 fprintf(out, "(empty)\n");
5095
5096         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5097                     isc_rwlocktype_read);
5098 }
5099
5100 static isc_result_t
5101 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5102 {
5103         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5104         rbtdb_dbiterator_t *rbtdbiter;
5105
5106         REQUIRE(VALID_RBTDB(rbtdb));
5107
5108         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5109         if (rbtdbiter == NULL)
5110                 return (ISC_R_NOMEMORY);
5111
5112         rbtdbiter->common.methods = &dbiterator_methods;
5113         rbtdbiter->common.db = NULL;
5114         dns_db_attach(db, &rbtdbiter->common.db);
5115         rbtdbiter->common.relative_names =
5116                         ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5117         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5118         rbtdbiter->common.cleaning = ISC_FALSE;
5119         rbtdbiter->paused = ISC_TRUE;
5120         rbtdbiter->tree_locked = isc_rwlocktype_none;
5121         rbtdbiter->result = ISC_R_SUCCESS;
5122         dns_fixedname_init(&rbtdbiter->name);
5123         dns_fixedname_init(&rbtdbiter->origin);
5124         rbtdbiter->node = NULL;
5125         rbtdbiter->delete = 0;
5126         rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5127         rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5128         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5129         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5130         dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5131         if (rbtdbiter->nsec3only)
5132                 rbtdbiter->current = &rbtdbiter->nsec3chain;
5133         else
5134                 rbtdbiter->current = &rbtdbiter->chain;
5135
5136         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5137
5138         return (ISC_R_SUCCESS);
5139 }
5140
5141 static isc_result_t
5142 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5143                   dns_rdatatype_t type, dns_rdatatype_t covers,
5144                   isc_stdtime_t now, dns_rdataset_t *rdataset,
5145                   dns_rdataset_t *sigrdataset)
5146 {
5147         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5148         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5149         rdatasetheader_t *header, *header_next, *found, *foundsig;
5150         rbtdb_serial_t serial;
5151         rbtdb_version_t *rbtversion = version;
5152         isc_boolean_t close_version = ISC_FALSE;
5153         rbtdb_rdatatype_t matchtype, sigmatchtype;
5154
5155         REQUIRE(VALID_RBTDB(rbtdb));
5156         REQUIRE(type != dns_rdatatype_any);
5157
5158         if (rbtversion == NULL) {
5159                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5160                 close_version = ISC_TRUE;
5161         }
5162         serial = rbtversion->serial;
5163         now = 0;
5164
5165         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5166                   isc_rwlocktype_read);
5167
5168         found = NULL;
5169         foundsig = NULL;
5170         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5171         if (covers == 0)
5172                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5173         else
5174                 sigmatchtype = 0;
5175
5176         for (header = rbtnode->data; header != NULL; header = header_next) {
5177                 header_next = header->next;
5178                 do {
5179                         if (header->serial <= serial &&
5180                             !IGNORE(header)) {
5181                                 /*
5182                                  * Is this a "this rdataset doesn't
5183                                  * exist" record?
5184                                  */
5185                                 if (NONEXISTENT(header))
5186                                         header = NULL;
5187                                 break;
5188                         } else
5189                                 header = header->down;
5190                 } while (header != NULL);
5191                 if (header != NULL) {
5192                         /*
5193                          * We have an active, extant rdataset.  If it's a
5194                          * type we're looking for, remember it.
5195                          */
5196                         if (header->type == matchtype) {
5197                                 found = header;
5198                                 if (foundsig != NULL)
5199                                         break;
5200                         } else if (header->type == sigmatchtype) {
5201                                 foundsig = header;
5202                                 if (found != NULL)
5203                                         break;
5204                         }
5205                 }
5206         }
5207         if (found != NULL) {
5208                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5209                 if (foundsig != NULL)
5210                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5211                                       sigrdataset);
5212         }
5213
5214         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5215                     isc_rwlocktype_read);
5216
5217         if (close_version)
5218                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5219                              ISC_FALSE);
5220
5221         if (found == NULL)
5222                 return (ISC_R_NOTFOUND);
5223
5224         return (ISC_R_SUCCESS);
5225 }
5226
5227 static isc_result_t
5228 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5229                    dns_rdatatype_t type, dns_rdatatype_t covers,
5230                    isc_stdtime_t now, dns_rdataset_t *rdataset,
5231                    dns_rdataset_t *sigrdataset)
5232 {
5233         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5234         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5235         rdatasetheader_t *header, *header_next, *found, *foundsig;
5236         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5237         isc_result_t result;
5238         nodelock_t *lock;
5239         isc_rwlocktype_t locktype;
5240
5241         REQUIRE(VALID_RBTDB(rbtdb));
5242         REQUIRE(type != dns_rdatatype_any);
5243
5244         UNUSED(version);
5245
5246         result = ISC_R_SUCCESS;
5247
5248         if (now == 0)
5249                 isc_stdtime_get(&now);
5250
5251         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5252         locktype = isc_rwlocktype_read;
5253         NODE_LOCK(lock, locktype);
5254
5255         found = NULL;
5256         foundsig = NULL;
5257         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5258         negtype = RBTDB_RDATATYPE_VALUE(0, type);
5259         if (covers == 0)
5260                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5261         else
5262                 sigmatchtype = 0;
5263
5264         for (header = rbtnode->data; header != NULL; header = header_next) {
5265                 header_next = header->next;
5266                 if (header->rdh_ttl <= now) {
5267                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5268                             (locktype == isc_rwlocktype_write ||
5269                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5270                                 /*
5271                                  * We update the node's status only when we
5272                                  * can get write access.
5273                                  */
5274                                 locktype = isc_rwlocktype_write;
5275
5276                                 /*
5277                                  * We don't check if refcurrent(rbtnode) == 0
5278                                  * and try to free like we do in cache_find(),
5279                                  * because refcurrent(rbtnode) must be
5280                                  * non-zero.  This is so because 'node' is an
5281                                  * argument to the function.
5282                                  */
5283                                 header->attributes |= RDATASET_ATTR_STALE;
5284                                 rbtnode->dirty = 1;
5285                         }
5286                 } else if (EXISTS(header)) {
5287                         if (header->type == matchtype)
5288                                 found = header;
5289                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5290                                  header->type == negtype)
5291                                 found = header;
5292                         else if (header->type == sigmatchtype)
5293                                 foundsig = header;
5294                 }
5295         }
5296         if (found != NULL) {
5297                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5298                 if (foundsig != NULL)
5299                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5300                                       sigrdataset);
5301         }
5302
5303         NODE_UNLOCK(lock, locktype);
5304
5305         if (found == NULL)
5306                 return (ISC_R_NOTFOUND);
5307
5308         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
5309                 /*
5310                  * We found a negative cache entry.
5311                  */
5312                 if (NXDOMAIN(found))
5313                         result = DNS_R_NCACHENXDOMAIN;
5314                 else
5315                         result = DNS_R_NCACHENXRRSET;
5316         }
5317
5318         return (result);
5319 }
5320
5321 static isc_result_t
5322 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5323              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5324 {
5325         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5326         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5327         rbtdb_version_t *rbtversion = version;
5328         rbtdb_rdatasetiter_t *iterator;
5329         unsigned int refs;
5330
5331         REQUIRE(VALID_RBTDB(rbtdb));
5332
5333         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5334         if (iterator == NULL)
5335                 return (ISC_R_NOMEMORY);
5336
5337         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5338                 now = 0;
5339                 if (rbtversion == NULL)
5340                         currentversion(db,
5341                                  (dns_dbversion_t **) (void *)(&rbtversion));
5342                 else {
5343                         unsigned int refs;
5344
5345                         isc_refcount_increment(&rbtversion->references,
5346                                                &refs);
5347                         INSIST(refs > 1);
5348                 }
5349         } else {
5350                 if (now == 0)
5351                         isc_stdtime_get(&now);
5352                 rbtversion = NULL;
5353         }
5354
5355         iterator->common.magic = DNS_RDATASETITER_MAGIC;
5356         iterator->common.methods = &rdatasetiter_methods;
5357         iterator->common.db = db;
5358         iterator->common.node = node;
5359         iterator->common.version = (dns_dbversion_t *)rbtversion;
5360         iterator->common.now = now;
5361
5362         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5363
5364         dns_rbtnode_refincrement(rbtnode, &refs);
5365         INSIST(refs != 0);
5366
5367         iterator->current = NULL;
5368
5369         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5370
5371         *iteratorp = (dns_rdatasetiter_t *)iterator;
5372
5373         return (ISC_R_SUCCESS);
5374 }
5375
5376 static isc_boolean_t
5377 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5378         rdatasetheader_t *header, *header_next;
5379         isc_boolean_t cname, other_data;
5380         dns_rdatatype_t rdtype;
5381
5382         /*
5383          * The caller must hold the node lock.
5384          */
5385
5386         /*
5387          * Look for CNAME and "other data" rdatasets active in our version.
5388          */
5389         cname = ISC_FALSE;
5390         other_data = ISC_FALSE;
5391         for (header = node->data; header != NULL; header = header_next) {
5392                 header_next = header->next;
5393                 if (header->type == dns_rdatatype_cname) {
5394                         /*
5395                          * Look for an active extant CNAME.
5396                          */
5397                         do {
5398                                 if (header->serial <= serial &&
5399                                     !IGNORE(header)) {
5400                                         /*
5401                                          * Is this a "this rdataset doesn't
5402                                          * exist" record?
5403                                          */
5404                                         if (NONEXISTENT(header))
5405                                                 header = NULL;
5406                                         break;
5407                                 } else
5408                                         header = header->down;
5409                         } while (header != NULL);
5410                         if (header != NULL)
5411                                 cname = ISC_TRUE;
5412                 } else {
5413                         /*
5414                          * Look for active extant "other data".
5415                          *
5416                          * "Other data" is any rdataset whose type is not
5417                          * KEY, NSEC, SIG or RRSIG.
5418                          */
5419                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
5420                         if (rdtype != dns_rdatatype_key &&
5421                             rdtype != dns_rdatatype_sig &&
5422                             rdtype != dns_rdatatype_nsec &&
5423                             rdtype != dns_rdatatype_rrsig) {
5424                                 /*
5425                                  * Is it active and extant?
5426                                  */
5427                                 do {
5428                                         if (header->serial <= serial &&
5429                                             !IGNORE(header)) {
5430                                                 /*
5431                                                  * Is this a "this rdataset
5432                                                  * doesn't exist" record?
5433                                                  */
5434                                                 if (NONEXISTENT(header))
5435                                                         header = NULL;
5436                                                 break;
5437                                         } else
5438                                                 header = header->down;
5439                                 } while (header != NULL);
5440                                 if (header != NULL)
5441                                         other_data = ISC_TRUE;
5442                         }
5443                 }
5444         }
5445
5446         if (cname && other_data)
5447                 return (ISC_TRUE);
5448
5449         return (ISC_FALSE);
5450 }
5451
5452 static isc_result_t
5453 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5454         isc_result_t result;
5455
5456         INSIST(!IS_CACHE(rbtdb));
5457         INSIST(newheader->heap_index == 0);
5458         INSIST(!ISC_LINK_LINKED(newheader, link));
5459
5460         result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5461         return (result);
5462 }
5463
5464 static isc_result_t
5465 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5466     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5467     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5468 {
5469         rbtdb_changed_t *changed = NULL;
5470         rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
5471         unsigned char *merged;
5472         isc_result_t result;
5473         isc_boolean_t header_nx;
5474         isc_boolean_t newheader_nx;
5475         isc_boolean_t merge;
5476         dns_rdatatype_t rdtype, covers;
5477         rbtdb_rdatatype_t negtype, sigtype;
5478         dns_trust_t trust;
5479         int idx;
5480
5481         /*
5482          * Add an rdatasetheader_t to a node.
5483          */
5484
5485         /*
5486          * Caller must be holding the node lock.
5487          */
5488
5489         if ((options & DNS_DBADD_MERGE) != 0) {
5490                 REQUIRE(rbtversion != NULL);
5491                 merge = ISC_TRUE;
5492         } else
5493                 merge = ISC_FALSE;
5494
5495         if ((options & DNS_DBADD_FORCE) != 0)
5496                 trust = dns_trust_ultimate;
5497         else
5498                 trust = newheader->trust;
5499
5500         if (rbtversion != NULL && !loading) {
5501                 /*
5502                  * We always add a changed record, even if no changes end up
5503                  * being made to this node, because it's harmless and
5504                  * simplifies the code.
5505                  */
5506                 changed = add_changed(rbtdb, rbtversion, rbtnode);
5507                 if (changed == NULL) {
5508                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5509                         return (ISC_R_NOMEMORY);
5510                 }
5511         }
5512
5513         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5514         topheader_prev = NULL;
5515         sigheader = NULL;
5516         negtype = 0;
5517         if (rbtversion == NULL && !newheader_nx) {
5518                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5519                 if (rdtype == 0) {
5520                         /*
5521                          * We're adding a negative cache entry.
5522                          */
5523                         covers = RBTDB_RDATATYPE_EXT(newheader->type);
5524                         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
5525                                                         covers);
5526                         for (topheader = rbtnode->data;
5527                              topheader != NULL;
5528                              topheader = topheader->next) {
5529                                 /*
5530                                  * If we're adding an negative cache entry
5531                                  * which covers all types (NXDOMAIN,
5532                                  * NODATA(QTYPE=ANY)).
5533                                  *
5534                                  * We make all other data stale so that the
5535                                  * only rdataset that can be found at this
5536                                  * node is the negative cache entry.
5537                                  *
5538                                  * Otherwise look for any RRSIGs of the
5539                                  * given type so they can be marked stale
5540                                  * later.
5541                                  */
5542                                 if (covers == dns_rdatatype_any) {
5543                                         set_ttl(rbtdb, topheader, 0);
5544                                         topheader->attributes |=
5545                                                 RDATASET_ATTR_STALE;
5546                                         rbtnode->dirty = 1;
5547                                 } else if (topheader->type == sigtype)
5548                                         sigheader = topheader;
5549                         }
5550                         if (covers == dns_rdatatype_any)
5551                                 goto find_header;
5552                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5553                 } else {
5554                         /*
5555                          * We're adding something that isn't a
5556                          * negative cache entry.  Look for an extant
5557                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5558                          * cache entry.
5559                          */
5560                         for (topheader = rbtnode->data;
5561                              topheader != NULL;
5562                              topheader = topheader->next) {
5563                                 if (topheader->type ==
5564                                     RBTDB_RDATATYPE_NCACHEANY)
5565                                         break;
5566                         }
5567                         if (topheader != NULL && EXISTS(topheader) &&
5568                             topheader->rdh_ttl > now) {
5569                                 /*
5570                                  * Found one.
5571                                  */
5572                                 if (trust < topheader->trust) {
5573                                         /*
5574                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
5575                                          * is more trusted.
5576                                          */
5577                                         free_rdataset(rbtdb,
5578                                                       rbtdb->common.mctx,
5579                                                       newheader);
5580                                         if (addedrdataset != NULL)
5581                                                 bind_rdataset(rbtdb, rbtnode,
5582                                                               topheader, now,
5583                                                               addedrdataset);
5584                                         return (DNS_R_UNCHANGED);
5585                                 }
5586                                 /*
5587                                  * The new rdataset is better.  Expire the
5588                                  * NXDOMAIN/NODATA(QTYPE=ANY).
5589                                  */
5590                                 set_ttl(rbtdb, topheader, 0);
5591                                 topheader->attributes |= RDATASET_ATTR_STALE;
5592                                 rbtnode->dirty = 1;
5593                                 topheader = NULL;
5594                                 goto find_header;
5595                         }
5596                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5597                 }
5598         }
5599
5600         for (topheader = rbtnode->data;
5601              topheader != NULL;
5602              topheader = topheader->next) {
5603                 if (topheader->type == newheader->type ||
5604                     topheader->type == negtype)
5605                         break;
5606                 topheader_prev = topheader;
5607         }
5608
5609  find_header:
5610         /*
5611          * If header isn't NULL, we've found the right type.  There may be
5612          * IGNORE rdatasets between the top of the chain and the first real
5613          * data.  We skip over them.
5614          */
5615         header = topheader;
5616         while (header != NULL && IGNORE(header))
5617                 header = header->down;
5618         if (header != NULL) {
5619                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5620
5621                 /*
5622                  * Deleting an already non-existent rdataset has no effect.
5623                  */
5624                 if (header_nx && newheader_nx) {
5625                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5626                         return (DNS_R_UNCHANGED);
5627                 }
5628
5629                 /*
5630                  * Trying to add an rdataset with lower trust to a cache DB
5631                  * has no effect, provided that the cache data isn't stale.
5632                  */
5633                 if (rbtversion == NULL && trust < header->trust &&
5634                     (header->rdh_ttl > now || header_nx)) {
5635                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5636                         if (addedrdataset != NULL)
5637                                 bind_rdataset(rbtdb, rbtnode, header, now,
5638                                               addedrdataset);
5639                         return (DNS_R_UNCHANGED);
5640                 }
5641
5642                 /*
5643                  * Don't merge if a nonexistent rdataset is involved.
5644                  */
5645                 if (merge && (header_nx || newheader_nx))
5646                         merge = ISC_FALSE;
5647
5648                 /*
5649                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5650                  * that is the union of 'newheader' and 'header'.
5651                  */
5652                 if (merge) {
5653                         unsigned int flags = 0;
5654                         INSIST(rbtversion->serial >= header->serial);
5655                         merged = NULL;
5656                         result = ISC_R_SUCCESS;
5657
5658                         if ((options & DNS_DBADD_EXACT) != 0)
5659                                 flags |= DNS_RDATASLAB_EXACT;
5660                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5661                              newheader->rdh_ttl != header->rdh_ttl)
5662                                         result = DNS_R_NOTEXACT;
5663                         else if (newheader->rdh_ttl != header->rdh_ttl)
5664                                 flags |= DNS_RDATASLAB_FORCE;
5665                         if (result == ISC_R_SUCCESS)
5666                                 result = dns_rdataslab_merge(
5667                                              (unsigned char *)header,
5668                                              (unsigned char *)newheader,
5669                                              (unsigned int)(sizeof(*newheader)),
5670                                              rbtdb->common.mctx,
5671                                              rbtdb->common.rdclass,
5672                                              (dns_rdatatype_t)header->type,
5673                                              flags, &merged);
5674                         if (result == ISC_R_SUCCESS) {
5675                                 /*
5676                                  * If 'header' has the same serial number as
5677                                  * we do, we could clean it up now if we knew
5678                                  * that our caller had no references to it.
5679                                  * We don't know this, however, so we leave it
5680                                  * alone.  It will get cleaned up when
5681                                  * clean_zone_node() runs.
5682                                  */
5683                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5684                                               newheader);
5685                                 newheader = (rdatasetheader_t *)merged;
5686                                 if (loading && RESIGN(newheader) &&
5687                                     RESIGN(header) &&
5688                                     header->resign < newheader->resign)
5689                                         newheader->resign = header->resign;
5690                         } else {
5691                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5692                                               newheader);
5693                                 return (result);
5694                         }
5695                 }
5696                 /*
5697                  * Don't replace existing NS, A and AAAA RRsets
5698                  * in the cache if they are already exist.  This
5699                  * prevents named being locked to old servers.
5700                  * Don't lower trust of existing record if the
5701                  * update is forced.
5702                  */
5703                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5704                     header->type == dns_rdatatype_ns &&
5705                     !header_nx && !newheader_nx &&
5706                     header->trust >= newheader->trust &&
5707                     dns_rdataslab_equalx((unsigned char *)header,
5708                                          (unsigned char *)newheader,
5709                                          (unsigned int)(sizeof(*newheader)),
5710                                          rbtdb->common.rdclass,
5711                                          (dns_rdatatype_t)header->type)) {
5712                         /*
5713                          * Honour the new ttl if it is less than the
5714                          * older one.
5715                          */
5716                         if (header->rdh_ttl > newheader->rdh_ttl)
5717                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5718                         if (header->noqname == NULL &&
5719                             newheader->noqname != NULL) {
5720                                 header->noqname = newheader->noqname;
5721                                 newheader->noqname = NULL;
5722                         }
5723                         if (header->closest == NULL &&
5724                             newheader->closest != NULL) {
5725                                 header->closest = newheader->closest;
5726                                 newheader->closest = NULL;
5727                         }
5728                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5729                         if (addedrdataset != NULL)
5730                                 bind_rdataset(rbtdb, rbtnode, header, now,
5731                                               addedrdataset);
5732                         return (ISC_R_SUCCESS);
5733                 }
5734                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5735                     (header->type == dns_rdatatype_a ||
5736                      header->type == dns_rdatatype_aaaa) &&
5737                     !header_nx && !newheader_nx &&
5738                     header->trust >= newheader->trust &&
5739                     dns_rdataslab_equal((unsigned char *)header,
5740                                         (unsigned char *)newheader,
5741                                         (unsigned int)(sizeof(*newheader)))) {
5742                         /*
5743                          * Honour the new ttl if it is less than the
5744                          * older one.
5745                          */
5746                         if (header->rdh_ttl > newheader->rdh_ttl)
5747                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5748                         if (header->noqname == NULL &&
5749                             newheader->noqname != NULL) {
5750                                 header->noqname = newheader->noqname;
5751                                 newheader->noqname = NULL;
5752                         }
5753                         if (header->closest == NULL &&
5754                             newheader->closest != NULL) {
5755                                 header->closest = newheader->closest;
5756                                 newheader->closest = NULL;
5757                         }
5758                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5759                         if (addedrdataset != NULL)
5760                                 bind_rdataset(rbtdb, rbtnode, header, now,
5761                                               addedrdataset);
5762                         return (ISC_R_SUCCESS);
5763                 }
5764                 INSIST(rbtversion == NULL ||
5765                        rbtversion->serial >= topheader->serial);
5766                 if (topheader_prev != NULL)
5767                         topheader_prev->next = newheader;
5768                 else
5769                         rbtnode->data = newheader;
5770                 newheader->next = topheader->next;
5771                 if (loading) {
5772                         /*
5773                          * There are no other references to 'header' when
5774                          * loading, so we MAY clean up 'header' now.
5775                          * Since we don't generate changed records when
5776                          * loading, we MUST clean up 'header' now.
5777                          */
5778                         newheader->down = NULL;
5779                         free_rdataset(rbtdb, rbtdb->common.mctx, header);
5780                 } else {
5781                         newheader->down = topheader;
5782                         topheader->next = newheader;
5783                         rbtnode->dirty = 1;
5784                         if (changed != NULL)
5785                                 changed->dirty = ISC_TRUE;
5786                         if (rbtversion == NULL) {
5787                                 set_ttl(rbtdb, header, 0);
5788                                 header->attributes |= RDATASET_ATTR_STALE;
5789                                 if (sigheader != NULL) {
5790                                         set_ttl(rbtdb, sigheader, 0);
5791                                         sigheader->attributes |=
5792                                                  RDATASET_ATTR_STALE;
5793                                 }
5794                         }
5795                         idx = newheader->node->locknum;
5796                         if (IS_CACHE(rbtdb)) {
5797                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5798                                                  newheader, link);
5799                                 /*
5800                                  * XXXMLG We don't check the return value
5801                                  * here.  If it fails, we will not do TTL
5802                                  * based expiry on this node.  However, we
5803                                  * will do it on the LRU side, so memory
5804                                  * will not leak... for long.
5805                                  */
5806                                 isc_heap_insert(rbtdb->heaps[idx], newheader);
5807                         } else if (RESIGN(newheader))
5808                                 resign_insert(rbtdb, idx, newheader);
5809                 }
5810         } else {
5811                 /*
5812                  * No non-IGNORED rdatasets of the given type exist at
5813                  * this node.
5814                  */
5815
5816                 /*
5817                  * If we're trying to delete the type, don't bother.
5818                  */
5819                 if (newheader_nx) {
5820                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5821                         return (DNS_R_UNCHANGED);
5822                 }
5823
5824                 if (topheader != NULL) {
5825                         /*
5826                          * We have an list of rdatasets of the given type,
5827                          * but they're all marked IGNORE.  We simply insert
5828                          * the new rdataset at the head of the list.
5829                          *
5830                          * Ignored rdatasets cannot occur during loading, so
5831                          * we INSIST on it.
5832                          */
5833                         INSIST(!loading);
5834                         INSIST(rbtversion == NULL ||
5835                                rbtversion->serial >= topheader->serial);
5836                         if (topheader_prev != NULL)
5837                                 topheader_prev->next = newheader;
5838                         else
5839                                 rbtnode->data = newheader;
5840                         newheader->next = topheader->next;
5841                         newheader->down = topheader;
5842                         topheader->next = newheader;
5843                         rbtnode->dirty = 1;
5844                         if (changed != NULL)
5845                                 changed->dirty = ISC_TRUE;
5846                 } else {
5847                         /*
5848                          * No rdatasets of the given type exist at the node.
5849                          */
5850                         newheader->next = rbtnode->data;
5851                         newheader->down = NULL;
5852                         rbtnode->data = newheader;
5853                 }
5854                 idx = newheader->node->locknum;
5855                 if (IS_CACHE(rbtdb)) {
5856                         ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5857                                          newheader, link);
5858                         isc_heap_insert(rbtdb->heaps[idx], newheader);
5859                 } else if (RESIGN(newheader)) {
5860                         resign_insert(rbtdb, idx, newheader);
5861                 }
5862         }
5863
5864         /*
5865          * Check if the node now contains CNAME and other data.
5866          */
5867         if (rbtversion != NULL &&
5868             cname_and_other_data(rbtnode, rbtversion->serial))
5869                 return (DNS_R_CNAMEANDOTHER);
5870
5871         if (addedrdataset != NULL)
5872                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5873
5874         return (ISC_R_SUCCESS);
5875 }
5876
5877 static inline isc_boolean_t
5878 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5879                 rbtdb_rdatatype_t type)
5880 {
5881         if (IS_CACHE(rbtdb)) {
5882                 if (type == dns_rdatatype_dname)
5883                         return (ISC_TRUE);
5884                 else
5885                         return (ISC_FALSE);
5886         } else if (type == dns_rdatatype_dname ||
5887                    (type == dns_rdatatype_ns &&
5888                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5889                 return (ISC_TRUE);
5890         return (ISC_FALSE);
5891 }
5892
5893 static inline isc_result_t
5894 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5895            dns_rdataset_t *rdataset)
5896 {
5897         struct noqname *noqname;
5898         isc_mem_t *mctx = rbtdb->common.mctx;
5899         dns_name_t name;
5900         dns_rdataset_t neg, negsig;
5901         isc_result_t result;
5902         isc_region_t r;
5903
5904         dns_name_init(&name, NULL);
5905         dns_rdataset_init(&neg);
5906         dns_rdataset_init(&negsig);
5907
5908         result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
5909         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5910
5911         noqname = isc_mem_get(mctx, sizeof(*noqname));
5912         if (noqname == NULL) {
5913                 result = ISC_R_NOMEMORY;
5914                 goto cleanup;
5915         }
5916         dns_name_init(&noqname->name, NULL);
5917         noqname->neg = NULL;
5918         noqname->negsig = NULL;
5919         noqname->type = neg.type;
5920         result = dns_name_dup(&name, mctx, &noqname->name);
5921         if (result != ISC_R_SUCCESS)
5922                 goto cleanup;
5923         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5924         if (result != ISC_R_SUCCESS)
5925                 goto cleanup;
5926         noqname->neg = r.base;
5927         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5928         if (result != ISC_R_SUCCESS)
5929                 goto cleanup;
5930         noqname->negsig = r.base;
5931         dns_rdataset_disassociate(&neg);
5932         dns_rdataset_disassociate(&negsig);
5933         newheader->noqname = noqname;
5934         return (ISC_R_SUCCESS);
5935
5936 cleanup:
5937         dns_rdataset_disassociate(&neg);
5938         dns_rdataset_disassociate(&negsig);
5939         free_noqname(mctx, &noqname);
5940         return(result);
5941 }
5942
5943 static inline isc_result_t
5944 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5945            dns_rdataset_t *rdataset)
5946 {
5947         struct noqname *closest;
5948         isc_mem_t *mctx = rbtdb->common.mctx;
5949         dns_name_t name;
5950         dns_rdataset_t neg, negsig;
5951         isc_result_t result;
5952         isc_region_t r;
5953
5954         dns_name_init(&name, NULL);
5955         dns_rdataset_init(&neg);
5956         dns_rdataset_init(&negsig);
5957
5958         result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
5959         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5960
5961         closest = isc_mem_get(mctx, sizeof(*closest));
5962         if (closest == NULL) {
5963                 result = ISC_R_NOMEMORY;
5964                 goto cleanup;
5965         }
5966         dns_name_init(&closest->name, NULL);
5967         closest->neg = NULL;
5968         closest->negsig = NULL;
5969         closest->type = neg.type;
5970         result = dns_name_dup(&name, mctx, &closest->name);
5971         if (result != ISC_R_SUCCESS)
5972                 goto cleanup;
5973         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5974         if (result != ISC_R_SUCCESS)
5975                 goto cleanup;
5976         closest->neg = r.base;
5977         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5978         if (result != ISC_R_SUCCESS)
5979                 goto cleanup;
5980         closest->negsig = r.base;
5981         dns_rdataset_disassociate(&neg);
5982         dns_rdataset_disassociate(&negsig);
5983         newheader->closest = closest;
5984         return (ISC_R_SUCCESS);
5985
5986  cleanup:
5987         dns_rdataset_disassociate(&neg);
5988         dns_rdataset_disassociate(&negsig);
5989         free_noqname(mctx, &closest);
5990         return(result);
5991 }
5992
5993 static dns_dbmethods_t zone_methods;
5994
5995 static isc_result_t
5996 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5997             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
5998             dns_rdataset_t *addedrdataset)
5999 {
6000         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6001         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6002         rbtdb_version_t *rbtversion = version;
6003         isc_region_t region;
6004         rdatasetheader_t *newheader;
6005         rdatasetheader_t *header;
6006         isc_result_t result;
6007         isc_boolean_t delegating;
6008         isc_boolean_t tree_locked = ISC_FALSE;
6009         isc_boolean_t cache_is_overmem = ISC_FALSE;
6010
6011         REQUIRE(VALID_RBTDB(rbtdb));
6012
6013         if (rbtdb->common.methods == &zone_methods)
6014                 REQUIRE(((rbtnode->nsec3 &&
6015                           (rdataset->type == dns_rdatatype_nsec3 ||
6016                            rdataset->covers == dns_rdatatype_nsec3)) ||
6017                          (!rbtnode->nsec3 &&
6018                            rdataset->type != dns_rdatatype_nsec3 &&
6019                            rdataset->covers != dns_rdatatype_nsec3)));
6020
6021         if (rbtversion == NULL) {
6022                 if (now == 0)
6023                         isc_stdtime_get(&now);
6024         } else
6025                 now = 0;
6026
6027         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6028                                             &region,
6029                                             sizeof(rdatasetheader_t));
6030         if (result != ISC_R_SUCCESS)
6031                 return (result);
6032
6033         newheader = (rdatasetheader_t *)region.base;
6034         init_rdataset(rbtdb, newheader);
6035         set_ttl(rbtdb, newheader, rdataset->ttl + now);
6036         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6037                                                 rdataset->covers);
6038         newheader->attributes = 0;
6039         newheader->noqname = NULL;
6040         newheader->closest = NULL;
6041         newheader->count = init_count++;
6042         newheader->trust = rdataset->trust;
6043         newheader->additional_auth = NULL;
6044         newheader->additional_glue = NULL;
6045         newheader->last_used = now;
6046         newheader->node = rbtnode;
6047         if (rbtversion != NULL) {
6048                 newheader->serial = rbtversion->serial;
6049                 now = 0;
6050
6051                 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6052                         newheader->attributes |= RDATASET_ATTR_RESIGN;
6053                         newheader->resign = rdataset->resign;
6054                 } else
6055                         newheader->resign = 0;
6056         } else {
6057                 newheader->serial = 1;
6058                 newheader->resign = 0;
6059                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6060                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6061                 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6062                         newheader->attributes |= RDATASET_ATTR_OPTOUT;
6063                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6064                         result = addnoqname(rbtdb, newheader, rdataset);
6065                         if (result != ISC_R_SUCCESS) {
6066                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6067                                               newheader);
6068                                 return (result);
6069                         }
6070                 }
6071                 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6072                         result = addclosest(rbtdb, newheader, rdataset);
6073                         if (result != ISC_R_SUCCESS) {
6074                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6075                                               newheader);
6076                                 return (result);
6077                         }
6078                 }
6079         }
6080
6081         /*
6082          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6083          * just DNAME for the cache), then we need to set the callback bit
6084          * on the node.
6085          */
6086         if (delegating_type(rbtdb, rbtnode, rdataset->type))
6087                 delegating = ISC_TRUE;
6088         else
6089                 delegating = ISC_FALSE;
6090
6091         /*
6092          * If we're adding a delegation type or the DB is a cache in an overmem
6093          * state, hold an exclusive lock on the tree.  In the latter case
6094          * the lock does not necessarily have to be acquired but it will help
6095          * purge stale entries more effectively.
6096          */
6097         if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
6098                 cache_is_overmem = ISC_TRUE;
6099         if (delegating || cache_is_overmem) {
6100                 tree_locked = ISC_TRUE;
6101                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6102         }
6103
6104         if (cache_is_overmem)
6105                 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6106
6107         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6108                   isc_rwlocktype_write);
6109
6110         if (rbtdb->rrsetstats != NULL) {
6111                 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6112                 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6113         }
6114
6115         if (IS_CACHE(rbtdb)) {
6116                 if (tree_locked)
6117                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6118
6119                 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6120                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6121                         expire_header(rbtdb, header, tree_locked);
6122
6123                 /*
6124                  * If we've been holding a write lock on the tree just for
6125                  * cleaning, we can release it now.  However, we still need the
6126                  * node lock.
6127                  */
6128                 if (tree_locked && !delegating) {
6129                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6130                         tree_locked = ISC_FALSE;
6131                 }
6132         }
6133
6134         result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
6135                      addedrdataset, now);
6136         if (result == ISC_R_SUCCESS && delegating)
6137                 rbtnode->find_callback = 1;
6138
6139         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6140                     isc_rwlocktype_write);
6141
6142         if (tree_locked)
6143                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6144
6145         /*
6146          * Update the zone's secure status.  If version is non-NULL
6147          * this is deferred until closeversion() is called.
6148          */
6149         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6150                 iszonesecure(db, version, rbtdb->origin_node);
6151
6152         return (result);
6153 }
6154
6155 static isc_result_t
6156 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6157                  dns_rdataset_t *rdataset, unsigned int options,
6158                  dns_rdataset_t *newrdataset)
6159 {
6160         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6161         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6162         rbtdb_version_t *rbtversion = version;
6163         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6164         unsigned char *subresult;
6165         isc_region_t region;
6166         isc_result_t result;
6167         rbtdb_changed_t *changed;
6168
6169         REQUIRE(VALID_RBTDB(rbtdb));
6170
6171         if (rbtdb->common.methods == &zone_methods)
6172                 REQUIRE(((rbtnode->nsec3 &&
6173                           (rdataset->type == dns_rdatatype_nsec3 ||
6174                            rdataset->covers == dns_rdatatype_nsec3)) ||
6175                          (!rbtnode->nsec3 &&
6176                            rdataset->type != dns_rdatatype_nsec3 &&
6177                            rdataset->covers != dns_rdatatype_nsec3)));
6178
6179         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6180                                             &region,
6181                                             sizeof(rdatasetheader_t));
6182         if (result != ISC_R_SUCCESS)
6183                 return (result);
6184         newheader = (rdatasetheader_t *)region.base;
6185         init_rdataset(rbtdb, newheader);
6186         set_ttl(rbtdb, newheader, rdataset->ttl);
6187         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6188                                                 rdataset->covers);
6189         newheader->attributes = 0;
6190         newheader->serial = rbtversion->serial;
6191         newheader->trust = 0;
6192         newheader->noqname = NULL;
6193         newheader->closest = NULL;
6194         newheader->count = init_count++;
6195         newheader->additional_auth = NULL;
6196         newheader->additional_glue = NULL;
6197         newheader->last_used = 0;
6198         newheader->node = rbtnode;
6199         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6200                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6201                 newheader->resign = rdataset->resign;
6202         } else
6203                 newheader->resign = 0;
6204
6205         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6206                   isc_rwlocktype_write);
6207
6208         changed = add_changed(rbtdb, rbtversion, rbtnode);
6209         if (changed == NULL) {
6210                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6211                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6212                             isc_rwlocktype_write);
6213                 return (ISC_R_NOMEMORY);
6214         }
6215
6216         topheader_prev = NULL;
6217         for (topheader = rbtnode->data;
6218              topheader != NULL;
6219              topheader = topheader->next) {
6220                 if (topheader->type == newheader->type)
6221                         break;
6222                 topheader_prev = topheader;
6223         }
6224         /*
6225          * If header isn't NULL, we've found the right type.  There may be
6226          * IGNORE rdatasets between the top of the chain and the first real
6227          * data.  We skip over them.
6228          */
6229         header = topheader;
6230         while (header != NULL && IGNORE(header))
6231                 header = header->down;
6232         if (header != NULL && EXISTS(header)) {
6233                 unsigned int flags = 0;
6234                 subresult = NULL;
6235                 result = ISC_R_SUCCESS;
6236                 if ((options & DNS_DBSUB_EXACT) != 0) {
6237                         flags |= DNS_RDATASLAB_EXACT;
6238                         if (newheader->rdh_ttl != header->rdh_ttl)
6239                                 result = DNS_R_NOTEXACT;
6240                 }
6241                 if (result == ISC_R_SUCCESS)
6242                         result = dns_rdataslab_subtract(
6243                                         (unsigned char *)header,
6244                                         (unsigned char *)newheader,
6245                                         (unsigned int)(sizeof(*newheader)),
6246                                         rbtdb->common.mctx,
6247                                         rbtdb->common.rdclass,
6248                                         (dns_rdatatype_t)header->type,
6249                                         flags, &subresult);
6250                 if (result == ISC_R_SUCCESS) {
6251                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6252                         newheader = (rdatasetheader_t *)subresult;
6253                         init_rdataset(rbtdb, newheader);
6254                         /*
6255                          * We have to set the serial since the rdataslab
6256                          * subtraction routine copies the reserved portion of
6257                          * header, not newheader.
6258                          */
6259                         newheader->serial = rbtversion->serial;
6260                         /*
6261                          * XXXJT: dns_rdataslab_subtract() copied the pointers
6262                          * to additional info.  We need to clear these fields
6263                          * to avoid having duplicated references.
6264                          */
6265                         newheader->additional_auth = NULL;
6266                         newheader->additional_glue = NULL;
6267                 } else if (result == DNS_R_NXRRSET) {
6268                         /*
6269                          * This subtraction would remove all of the rdata;
6270                          * add a nonexistent header instead.
6271                          */
6272                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6273                         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6274                         if (newheader == NULL) {
6275                                 result = ISC_R_NOMEMORY;
6276                                 goto unlock;
6277                         }
6278                         set_ttl(rbtdb, newheader, 0);
6279                         newheader->type = topheader->type;
6280                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6281                         newheader->trust = 0;
6282                         newheader->serial = rbtversion->serial;
6283                         newheader->noqname = NULL;
6284                         newheader->closest = NULL;
6285                         newheader->count = 0;
6286                         newheader->additional_auth = NULL;
6287                         newheader->additional_glue = NULL;
6288                         newheader->node = rbtnode;
6289                         newheader->resign = 0;
6290                         newheader->last_used = 0;
6291                 } else {
6292                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6293                         goto unlock;
6294                 }
6295
6296                 /*
6297                  * If we're here, we want to link newheader in front of
6298                  * topheader.
6299                  */
6300                 INSIST(rbtversion->serial >= topheader->serial);
6301                 if (topheader_prev != NULL)
6302                         topheader_prev->next = newheader;
6303                 else
6304                         rbtnode->data = newheader;
6305                 newheader->next = topheader->next;
6306                 newheader->down = topheader;
6307                 topheader->next = newheader;
6308                 rbtnode->dirty = 1;
6309                 changed->dirty = ISC_TRUE;
6310         } else {
6311                 /*
6312                  * The rdataset doesn't exist, so we don't need to do anything
6313                  * to satisfy the deletion request.
6314                  */
6315                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6316                 if ((options & DNS_DBSUB_EXACT) != 0)
6317                         result = DNS_R_NOTEXACT;
6318                 else
6319                         result = DNS_R_UNCHANGED;
6320         }
6321
6322         if (result == ISC_R_SUCCESS && newrdataset != NULL)
6323                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6324
6325  unlock:
6326         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6327                     isc_rwlocktype_write);
6328
6329         /*
6330          * Update the zone's secure status.  If version is non-NULL
6331          * this is deferred until closeversion() is called.
6332          */
6333         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6334                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6335
6336         return (result);
6337 }
6338
6339 static isc_result_t
6340 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6341                dns_rdatatype_t type, dns_rdatatype_t covers)
6342 {
6343         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6344         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6345         rbtdb_version_t *rbtversion = version;
6346         isc_result_t result;
6347         rdatasetheader_t *newheader;
6348
6349         REQUIRE(VALID_RBTDB(rbtdb));
6350
6351         if (type == dns_rdatatype_any)
6352                 return (ISC_R_NOTIMPLEMENTED);
6353         if (type == dns_rdatatype_rrsig && covers == 0)
6354                 return (ISC_R_NOTIMPLEMENTED);
6355
6356         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6357         if (newheader == NULL)
6358                 return (ISC_R_NOMEMORY);
6359         set_ttl(rbtdb, newheader, 0);
6360         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6361         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6362         newheader->trust = 0;
6363         newheader->noqname = NULL;
6364         newheader->closest = NULL;
6365         newheader->additional_auth = NULL;
6366         newheader->additional_glue = NULL;
6367         if (rbtversion != NULL)
6368                 newheader->serial = rbtversion->serial;
6369         else
6370                 newheader->serial = 0;
6371         newheader->count = 0;
6372         newheader->last_used = 0;
6373         newheader->node = rbtnode;
6374
6375         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6376                   isc_rwlocktype_write);
6377
6378         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6379                      ISC_FALSE, NULL, 0);
6380
6381         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6382                     isc_rwlocktype_write);
6383
6384         /*
6385          * Update the zone's secure status.  If version is non-NULL
6386          * this is deferred until closeversion() is called.
6387          */
6388         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6389                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6390
6391         return (result);
6392 }
6393
6394 static isc_result_t
6395 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6396         rbtdb_load_t *loadctx = arg;
6397         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6398         dns_rbtnode_t *node;
6399         isc_result_t result;
6400         isc_region_t region;
6401         rdatasetheader_t *newheader;
6402
6403         /*
6404          * This routine does no node locking.  See comments in
6405          * 'load' below for more information on loading and
6406          * locking.
6407          */
6408
6409
6410         /*
6411          * SOA records are only allowed at top of zone.
6412          */
6413         if (rdataset->type == dns_rdatatype_soa &&
6414             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6415                 return (DNS_R_NOTZONETOP);
6416
6417         if (rdataset->type != dns_rdatatype_nsec3 &&
6418             rdataset->covers != dns_rdatatype_nsec3)
6419                 add_empty_wildcards(rbtdb, name);
6420
6421         if (dns_name_iswildcard(name)) {
6422                 /*
6423                  * NS record owners cannot legally be wild cards.
6424                  */
6425                 if (rdataset->type == dns_rdatatype_ns)
6426                         return (DNS_R_INVALIDNS);
6427                 /*
6428                  * NSEC3 record owners cannot legally be wild cards.
6429                  */
6430                 if (rdataset->type == dns_rdatatype_nsec3)
6431                         return (DNS_R_INVALIDNSEC3);
6432                 result = add_wildcard_magic(rbtdb, name);
6433                 if (result != ISC_R_SUCCESS)
6434                         return (result);
6435         }
6436
6437         node = NULL;
6438         if (rdataset->type == dns_rdatatype_nsec3 ||
6439             rdataset->covers == dns_rdatatype_nsec3) {
6440                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6441                 if (result == ISC_R_SUCCESS)
6442                         node->nsec3 = 1;
6443         } else {
6444                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
6445                 if (result == ISC_R_SUCCESS)
6446                         node->nsec3 = 0;
6447         }
6448         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6449                 return (result);
6450         if (result != ISC_R_EXISTS) {
6451                 dns_name_t foundname;
6452                 dns_name_init(&foundname, NULL);
6453                 dns_rbt_namefromnode(node, &foundname);
6454 #ifdef DNS_RBT_USEHASH
6455                 node->locknum = node->hashval % rbtdb->node_lock_count;
6456 #else
6457                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6458                         rbtdb->node_lock_count;
6459 #endif
6460         }
6461
6462         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6463                                             &region,
6464                                             sizeof(rdatasetheader_t));
6465         if (result != ISC_R_SUCCESS)
6466                 return (result);
6467         newheader = (rdatasetheader_t *)region.base;
6468         init_rdataset(rbtdb, newheader);
6469         set_ttl(rbtdb, newheader,
6470                 rdataset->ttl + loadctx->now); /* XXX overflow check */
6471         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6472                                                 rdataset->covers);
6473         newheader->attributes = 0;
6474         newheader->trust = rdataset->trust;
6475         newheader->serial = 1;
6476         newheader->noqname = NULL;
6477         newheader->closest = NULL;
6478         newheader->count = init_count++;
6479         newheader->additional_auth = NULL;
6480         newheader->additional_glue = NULL;
6481         newheader->last_used = 0;
6482         newheader->node = node;
6483         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6484                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6485                 newheader->resign = rdataset->resign;
6486         } else
6487                 newheader->resign = 0;
6488
6489         result = add(rbtdb, node, rbtdb->current_version, newheader,
6490                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6491         if (result == ISC_R_SUCCESS &&
6492             delegating_type(rbtdb, node, rdataset->type))
6493                 node->find_callback = 1;
6494         else if (result == DNS_R_UNCHANGED)
6495                 result = ISC_R_SUCCESS;
6496
6497         return (result);
6498 }
6499
6500 static isc_result_t
6501 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6502         rbtdb_load_t *loadctx;
6503         dns_rbtdb_t *rbtdb;
6504
6505         rbtdb = (dns_rbtdb_t *)db;
6506
6507         REQUIRE(VALID_RBTDB(rbtdb));
6508
6509         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6510         if (loadctx == NULL)
6511                 return (ISC_R_NOMEMORY);
6512
6513         loadctx->rbtdb = rbtdb;
6514         if (IS_CACHE(rbtdb))
6515                 isc_stdtime_get(&loadctx->now);
6516         else
6517                 loadctx->now = 0;
6518
6519         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6520
6521         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6522                 == 0);
6523         rbtdb->attributes |= RBTDB_ATTR_LOADING;
6524
6525         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6526
6527         *addp = loading_addrdataset;
6528         *dbloadp = loadctx;
6529
6530         return (ISC_R_SUCCESS);
6531 }
6532
6533 static isc_result_t
6534 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6535         rbtdb_load_t *loadctx;
6536         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6537
6538         REQUIRE(VALID_RBTDB(rbtdb));
6539         REQUIRE(dbloadp != NULL);
6540         loadctx = *dbloadp;
6541         REQUIRE(loadctx->rbtdb == rbtdb);
6542
6543         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6544
6545         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6546         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6547
6548         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6549         rbtdb->attributes |= RBTDB_ATTR_LOADED;
6550
6551         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6552
6553         /*
6554          * If there's a KEY rdataset at the zone origin containing a
6555          * zone key, we consider the zone secure.
6556          */
6557         if (! IS_CACHE(rbtdb))
6558                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6559
6560         *dbloadp = NULL;
6561
6562         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6563
6564         return (ISC_R_SUCCESS);
6565 }
6566
6567 static isc_result_t
6568 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6569      dns_masterformat_t masterformat) {
6570         dns_rbtdb_t *rbtdb;
6571
6572         rbtdb = (dns_rbtdb_t *)db;
6573
6574         REQUIRE(VALID_RBTDB(rbtdb));
6575
6576         return (dns_master_dump2(rbtdb->common.mctx, db, version,
6577                                  &dns_master_style_default,
6578                                  filename, masterformat));
6579 }
6580
6581 static void
6582 delete_callback(void *data, void *arg) {
6583         dns_rbtdb_t *rbtdb = arg;
6584         rdatasetheader_t *current, *next;
6585         unsigned int locknum;
6586
6587         current = data;
6588         locknum = current->node->locknum;
6589         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6590         while (current != NULL) {
6591                 next = current->next;
6592                 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6593                 current = next;
6594         }
6595         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6596 }
6597
6598 static isc_boolean_t
6599 issecure(dns_db_t *db) {
6600         dns_rbtdb_t *rbtdb;
6601         isc_boolean_t secure;
6602
6603         rbtdb = (dns_rbtdb_t *)db;
6604
6605         REQUIRE(VALID_RBTDB(rbtdb));
6606
6607         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6608         secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6609         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6610
6611         return (secure);
6612 }
6613
6614 static isc_boolean_t
6615 isdnssec(dns_db_t *db) {
6616         dns_rbtdb_t *rbtdb;
6617         isc_boolean_t dnssec;
6618
6619         rbtdb = (dns_rbtdb_t *)db;
6620
6621         REQUIRE(VALID_RBTDB(rbtdb));
6622
6623         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6624         dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6625         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6626
6627         return (dnssec);
6628 }
6629
6630 static unsigned int
6631 nodecount(dns_db_t *db) {
6632         dns_rbtdb_t *rbtdb;
6633         unsigned int count;
6634
6635         rbtdb = (dns_rbtdb_t *)db;
6636
6637         REQUIRE(VALID_RBTDB(rbtdb));
6638
6639         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6640         count = dns_rbt_nodecount(rbtdb->tree);
6641         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6642
6643         return (count);
6644 }
6645
6646 static void
6647 settask(dns_db_t *db, isc_task_t *task) {
6648         dns_rbtdb_t *rbtdb;
6649
6650         rbtdb = (dns_rbtdb_t *)db;
6651
6652         REQUIRE(VALID_RBTDB(rbtdb));
6653
6654         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6655         if (rbtdb->task != NULL)
6656                 isc_task_detach(&rbtdb->task);
6657         if (task != NULL)
6658                 isc_task_attach(task, &rbtdb->task);
6659         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6660 }
6661
6662 static isc_boolean_t
6663 ispersistent(dns_db_t *db) {
6664         UNUSED(db);
6665         return (ISC_FALSE);
6666 }
6667
6668 static isc_result_t
6669 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6670         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6671         dns_rbtnode_t *onode;
6672         isc_result_t result = ISC_R_SUCCESS;
6673
6674         REQUIRE(VALID_RBTDB(rbtdb));
6675         REQUIRE(nodep != NULL && *nodep == NULL);
6676
6677         /* Note that the access to origin_node doesn't require a DB lock */
6678         onode = (dns_rbtnode_t *)rbtdb->origin_node;
6679         if (onode != NULL) {
6680                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6681                 new_reference(rbtdb, onode);
6682                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6683
6684                 *nodep = rbtdb->origin_node;
6685         } else {
6686                 INSIST(IS_CACHE(rbtdb));
6687                 result = ISC_R_NOTFOUND;
6688         }
6689
6690         return (result);
6691 }
6692
6693 static isc_result_t
6694 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6695                    isc_uint8_t *flags, isc_uint16_t *iterations,
6696                    unsigned char *salt, size_t *salt_length)
6697 {
6698         dns_rbtdb_t *rbtdb;
6699         isc_result_t result = ISC_R_NOTFOUND;
6700         rbtdb_version_t *rbtversion = version;
6701
6702         rbtdb = (dns_rbtdb_t *)db;
6703
6704         REQUIRE(VALID_RBTDB(rbtdb));
6705
6706         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6707
6708         if (rbtversion == NULL)
6709                 rbtversion = rbtdb->current_version;
6710
6711         if (rbtversion->havensec3) {
6712                 if (hash != NULL)
6713                         *hash = rbtversion->hash;
6714                 if (salt != NULL && salt_length != NULL) {
6715                         REQUIRE(*salt_length >= rbtversion->salt_length);
6716                         memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6717                 }
6718                 if (salt_length != NULL)
6719                         *salt_length = rbtversion->salt_length;
6720                 if (iterations != NULL)
6721                         *iterations = rbtversion->iterations;
6722                 if (flags != NULL)
6723                         *flags = rbtversion->flags;
6724                 result = ISC_R_SUCCESS;
6725         }
6726         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6727
6728         return (result);
6729 }
6730
6731 static isc_result_t
6732 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6733         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6734         isc_stdtime_t oldresign;
6735         isc_result_t result = ISC_R_SUCCESS;
6736         rdatasetheader_t *header;
6737
6738         REQUIRE(VALID_RBTDB(rbtdb));
6739         REQUIRE(!IS_CACHE(rbtdb));
6740         REQUIRE(rdataset != NULL);
6741
6742         header = rdataset->private3;
6743         header--;
6744
6745         NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6746                   isc_rwlocktype_write);
6747
6748         oldresign = header->resign;
6749         header->resign = resign;
6750         if (header->heap_index != 0) {
6751                 INSIST(RESIGN(header));
6752                 if (resign == 0) {
6753                         isc_heap_delete(rbtdb->heaps[header->node->locknum],
6754                                         header->heap_index);
6755                         header->heap_index = 0;
6756                 } else if (resign < oldresign)
6757                         isc_heap_increased(rbtdb->heaps[header->node->locknum],
6758                                            header->heap_index);
6759                 else
6760                         isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6761                                            header->heap_index);
6762         } else if (resign && header->heap_index == 0) {
6763                 header->attributes |= RDATASET_ATTR_RESIGN;
6764                 result = resign_insert(rbtdb, header->node->locknum, header);
6765         }
6766         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6767                     isc_rwlocktype_write);
6768         return (result);
6769 }
6770
6771 static isc_result_t
6772 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
6773                dns_name_t *foundname)
6774 {
6775         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6776         rdatasetheader_t *header = NULL, *this;
6777         unsigned int i;
6778         isc_result_t result = ISC_R_NOTFOUND;
6779         unsigned int locknum;
6780
6781         REQUIRE(VALID_RBTDB(rbtdb));
6782
6783         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
6784
6785         for (i = 0; i < rbtdb->node_lock_count; i++) {
6786                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
6787                 this = isc_heap_element(rbtdb->heaps[i], 1);
6788                 if (this == NULL) {
6789                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6790                                     isc_rwlocktype_read);
6791                         continue;
6792                 }
6793                 if (header == NULL)
6794                         header = this;
6795                 else if (isc_serial_lt(this->resign, header->resign)) {
6796                         locknum = header->node->locknum;
6797                         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
6798                                     isc_rwlocktype_read);
6799                         header = this;
6800                 } else
6801                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6802                                     isc_rwlocktype_read);
6803         }
6804
6805         if (header == NULL)
6806                 goto unlock;
6807
6808         bind_rdataset(rbtdb, header->node, header, 0, rdataset);
6809
6810         if (foundname != NULL)
6811                 dns_rbt_fullnamefromnode(header->node, foundname);
6812
6813         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6814                     isc_rwlocktype_read);
6815
6816         result = ISC_R_SUCCESS;
6817
6818  unlock:
6819         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
6820
6821         return (result);
6822 }
6823
6824 static void
6825 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
6826 {
6827         rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
6828         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6829         dns_rbtnode_t *node;
6830         rdatasetheader_t *header;
6831
6832         REQUIRE(VALID_RBTDB(rbtdb));
6833         REQUIRE(rdataset != NULL);
6834         REQUIRE(rbtdb->future_version == rbtversion);
6835         REQUIRE(rbtversion->writer);
6836
6837         node = rdataset->private2;
6838         header = rdataset->private3;
6839         header--;
6840
6841         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6842         NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
6843                   isc_rwlocktype_write);
6844         /*
6845          * Delete from heap and save to re-signed list so that it can
6846          * be restored if we backout of this change.
6847          */
6848         new_reference(rbtdb, node);
6849         isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
6850         header->heap_index = 0;
6851         ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
6852
6853         NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
6854                     isc_rwlocktype_write);
6855         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6856 }
6857
6858 static dns_stats_t *
6859 getrrsetstats(dns_db_t *db) {
6860         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6861
6862         REQUIRE(VALID_RBTDB(rbtdb));
6863         REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6864
6865         return (rbtdb->rrsetstats);
6866 }
6867
6868 static dns_dbmethods_t zone_methods = {
6869         attach,
6870         detach,
6871         beginload,
6872         endload,
6873         dump,
6874         currentversion,
6875         newversion,
6876         attachversion,
6877         closeversion,
6878         findnode,
6879         zone_find,
6880         zone_findzonecut,
6881         attachnode,
6882         detachnode,
6883         expirenode,
6884         printnode,
6885         createiterator,
6886         zone_findrdataset,
6887         allrdatasets,
6888         addrdataset,
6889         subtractrdataset,
6890         deleterdataset,
6891         issecure,
6892         nodecount,
6893         ispersistent,
6894         overmem,
6895         settask,
6896         getoriginnode,
6897         NULL,
6898         getnsec3parameters,
6899         findnsec3node,
6900         setsigningtime,
6901         getsigningtime,
6902         resigned,
6903         isdnssec,
6904         NULL
6905 };
6906
6907 static dns_dbmethods_t cache_methods = {
6908         attach,
6909         detach,
6910         beginload,
6911         endload,
6912         dump,
6913         currentversion,
6914         newversion,
6915         attachversion,
6916         closeversion,
6917         findnode,
6918         cache_find,
6919         cache_findzonecut,
6920         attachnode,
6921         detachnode,
6922         expirenode,
6923         printnode,
6924         createiterator,
6925         cache_findrdataset,
6926         allrdatasets,
6927         addrdataset,
6928         subtractrdataset,
6929         deleterdataset,
6930         issecure,
6931         nodecount,
6932         ispersistent,
6933         overmem,
6934         settask,
6935         getoriginnode,
6936         NULL,
6937         NULL,
6938         NULL,
6939         NULL,
6940         NULL,
6941         NULL,
6942         isdnssec,
6943         getrrsetstats
6944 };
6945
6946 isc_result_t
6947 #ifdef DNS_RBTDB_VERSION64
6948 dns_rbtdb64_create
6949 #else
6950 dns_rbtdb_create
6951 #endif
6952                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6953                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6954                  void *driverarg, dns_db_t **dbp)
6955 {
6956         dns_rbtdb_t *rbtdb;
6957         isc_result_t result;
6958         int i;
6959         dns_name_t name;
6960         isc_boolean_t (*sooner)(void *, void *);
6961
6962         /* Keep the compiler happy. */
6963         UNUSED(argc);
6964         UNUSED(argv);
6965         UNUSED(driverarg);
6966
6967         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
6968         if (rbtdb == NULL)
6969                 return (ISC_R_NOMEMORY);
6970
6971         memset(rbtdb, '\0', sizeof(*rbtdb));
6972         dns_name_init(&rbtdb->common.origin, NULL);
6973         rbtdb->common.attributes = 0;
6974         if (type == dns_dbtype_cache) {
6975                 rbtdb->common.methods = &cache_methods;
6976                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
6977         } else if (type == dns_dbtype_stub) {
6978                 rbtdb->common.methods = &zone_methods;
6979                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
6980         } else
6981                 rbtdb->common.methods = &zone_methods;
6982         rbtdb->common.rdclass = rdclass;
6983         rbtdb->common.mctx = NULL;
6984
6985         result = RBTDB_INITLOCK(&rbtdb->lock);
6986         if (result != ISC_R_SUCCESS)
6987                 goto cleanup_rbtdb;
6988
6989         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
6990         if (result != ISC_R_SUCCESS)
6991                 goto cleanup_lock;
6992
6993         /*
6994          * Initialize node_lock_count in a generic way to support future
6995          * extension which allows the user to specify this value on creation.
6996          * Note that when specified for a cache DB it must be larger than 1
6997          * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
6998          */
6999         if (rbtdb->node_lock_count == 0) {
7000                 if (IS_CACHE(rbtdb))
7001                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
7002                 else
7003                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
7004         } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
7005                 result = ISC_R_RANGE;
7006                 goto cleanup_tree_lock;
7007         }
7008         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
7009         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
7010                                         sizeof(rbtdb_nodelock_t));
7011         if (rbtdb->node_locks == NULL) {
7012                 result = ISC_R_NOMEMORY;
7013                 goto cleanup_tree_lock;
7014         }
7015
7016         rbtdb->rrsetstats = NULL;
7017         if (IS_CACHE(rbtdb)) {
7018                 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
7019                 if (result != ISC_R_SUCCESS)
7020                         goto cleanup_node_locks;
7021                 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
7022                                                sizeof(rdatasetheaderlist_t));
7023                 if (rbtdb->rdatasets == NULL) {
7024                         result = ISC_R_NOMEMORY;
7025                         goto cleanup_rrsetstats;
7026                 }
7027                 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7028                         ISC_LIST_INIT(rbtdb->rdatasets[i]);
7029         } else
7030                 rbtdb->rdatasets = NULL;
7031
7032         /*
7033          * Create the heaps.
7034          */
7035         rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
7036                                    sizeof(isc_heap_t *));
7037         if (rbtdb->heaps == NULL) {
7038                 result = ISC_R_NOMEMORY;
7039                 goto cleanup_rdatasets;
7040         }
7041         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7042                 rbtdb->heaps[i] = NULL;
7043         sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
7044         for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
7045                 result = isc_heap_create(mctx, sooner, set_index, 0,
7046                                          &rbtdb->heaps[i]);
7047                 if (result != ISC_R_SUCCESS)
7048                         goto cleanup_heaps;
7049         }
7050
7051         /*
7052          * Create deadnode lists.
7053          */
7054         rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
7055                                        sizeof(rbtnodelist_t));
7056         if (rbtdb->deadnodes == NULL) {
7057                 result = ISC_R_NOMEMORY;
7058                 goto cleanup_heaps;
7059         }
7060         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7061                 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7062
7063         rbtdb->active = rbtdb->node_lock_count;
7064
7065         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7066                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7067                 if (result == ISC_R_SUCCESS) {
7068                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7069                         if (result != ISC_R_SUCCESS)
7070                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7071                 }
7072                 if (result != ISC_R_SUCCESS) {
7073                         while (i-- > 0) {
7074                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7075                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7076                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7077                         }
7078                         goto cleanup_deadnodes;
7079                 }
7080                 rbtdb->node_locks[i].exiting = ISC_FALSE;
7081         }
7082
7083         /*
7084          * Attach to the mctx.  The database will persist so long as there
7085          * are references to it, and attaching to the mctx ensures that our
7086          * mctx won't disappear out from under us.
7087          */
7088         isc_mem_attach(mctx, &rbtdb->common.mctx);
7089
7090         /*
7091          * Must be initialized before free_rbtdb() is called.
7092          */
7093         isc_ondestroy_init(&rbtdb->common.ondest);
7094
7095         /*
7096          * Make a copy of the origin name.
7097          */
7098         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7099         if (result != ISC_R_SUCCESS) {
7100                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7101                 return (result);
7102         }
7103
7104         /*
7105          * Make the Red-Black Trees.
7106          */
7107         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7108         if (result != ISC_R_SUCCESS) {
7109                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7110                 return (result);
7111         }
7112
7113         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7114         if (result != ISC_R_SUCCESS) {
7115                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7116                 return (result);
7117         }
7118
7119         /*
7120          * In order to set the node callback bit correctly in zone databases,
7121          * we need to know if the node has the origin name of the zone.
7122          * In loading_addrdataset() we could simply compare the new name
7123          * to the origin name, but this is expensive.  Also, we don't know the
7124          * node name in addrdataset(), so we need another way of knowing the
7125          * zone's top.
7126          *
7127          * We now explicitly create a node for the zone's origin, and then
7128          * we simply remember the node's address.  This is safe, because
7129          * the top-of-zone node can never be deleted, nor can its address
7130          * change.
7131          */
7132         if (!IS_CACHE(rbtdb)) {
7133                 dns_rbtnode_t *nsec3node;
7134
7135                 rbtdb->origin_node = NULL;
7136                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7137                                          &rbtdb->origin_node);
7138                 if (result != ISC_R_SUCCESS) {
7139                         INSIST(result != ISC_R_EXISTS);
7140                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7141                         return (result);
7142                 }
7143                 rbtdb->origin_node->nsec3 = 0;
7144                 /*
7145                  * We need to give the origin node the right locknum.
7146                  */
7147                 dns_name_init(&name, NULL);
7148                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7149 #ifdef DNS_RBT_USEHASH
7150                 rbtdb->origin_node->locknum =
7151                         rbtdb->origin_node->hashval %
7152                         rbtdb->node_lock_count;
7153 #else
7154                 rbtdb->origin_node->locknum =
7155                         dns_name_hash(&name, ISC_TRUE) %
7156                         rbtdb->node_lock_count;
7157 #endif
7158                 /*
7159                  * Add an apex node to the NSEC3 tree so that NSEC3 searches
7160                  * return partial matches when there is only a single NSEC3
7161                  * record in the tree.
7162                  */
7163                 nsec3node = NULL;
7164                 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
7165                                          &nsec3node);
7166                 if (result != ISC_R_SUCCESS) {
7167                         INSIST(result != ISC_R_EXISTS);
7168                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7169                         return (result);
7170                 }
7171                 nsec3node->nsec3 = 1;
7172                 /*
7173                  * We need to give the nsec3 origin node the right locknum.
7174                  */
7175                 dns_name_init(&name, NULL);
7176                 dns_rbt_namefromnode(nsec3node, &name);
7177 #ifdef DNS_RBT_USEHASH
7178                 nsec3node->locknum = nsec3node->hashval %
7179                         rbtdb->node_lock_count;
7180 #else
7181                 nsec3node->locknum = dns_name_hash(&name, ISC_TRUE) %
7182                         rbtdb->node_lock_count;
7183 #endif
7184         }
7185
7186         /*
7187          * Misc. Initialization.
7188          */
7189         result = isc_refcount_init(&rbtdb->references, 1);
7190         if (result != ISC_R_SUCCESS) {
7191                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7192                 return (result);
7193         }
7194         rbtdb->attributes = 0;
7195         rbtdb->task = NULL;
7196
7197         /*
7198          * Version Initialization.
7199          */
7200         rbtdb->current_serial = 1;
7201         rbtdb->least_serial = 1;
7202         rbtdb->next_serial = 2;
7203         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7204         if (rbtdb->current_version == NULL) {
7205                 isc_refcount_decrement(&rbtdb->references, NULL);
7206                 isc_refcount_destroy(&rbtdb->references);
7207                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7208                 return (ISC_R_NOMEMORY);
7209         }
7210         rbtdb->current_version->secure = dns_db_insecure;
7211         rbtdb->current_version->havensec3 = ISC_FALSE;
7212         rbtdb->current_version->flags = 0;
7213         rbtdb->current_version->iterations = 0;
7214         rbtdb->current_version->hash = 0;
7215         rbtdb->current_version->salt_length = 0;
7216         memset(rbtdb->current_version->salt, 0,
7217                sizeof(rbtdb->current_version->salt));
7218         rbtdb->future_version = NULL;
7219         ISC_LIST_INIT(rbtdb->open_versions);
7220         /*
7221          * Keep the current version in the open list so that list operation
7222          * won't happen in normal lookup operations.
7223          */
7224         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7225
7226         rbtdb->common.magic = DNS_DB_MAGIC;
7227         rbtdb->common.impmagic = RBTDB_MAGIC;
7228
7229         *dbp = (dns_db_t *)rbtdb;
7230
7231         return (ISC_R_SUCCESS);
7232
7233  cleanup_deadnodes:
7234         isc_mem_put(mctx, rbtdb->deadnodes,
7235                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7236
7237  cleanup_heaps:
7238         if (rbtdb->heaps != NULL) {
7239                 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7240                         if (rbtdb->heaps[i] != NULL)
7241                                 isc_heap_destroy(&rbtdb->heaps[i]);
7242                 isc_mem_put(mctx, rbtdb->heaps,
7243                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
7244         }
7245
7246  cleanup_rdatasets:
7247         if (rbtdb->rdatasets != NULL)
7248                 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7249                             sizeof(rdatasetheaderlist_t));
7250  cleanup_rrsetstats:
7251         if (rbtdb->rrsetstats != NULL)
7252                 dns_stats_detach(&rbtdb->rrsetstats);
7253
7254  cleanup_node_locks:
7255         isc_mem_put(mctx, rbtdb->node_locks,
7256                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7257
7258  cleanup_tree_lock:
7259         isc_rwlock_destroy(&rbtdb->tree_lock);
7260
7261  cleanup_lock:
7262         RBTDB_DESTROYLOCK(&rbtdb->lock);
7263
7264  cleanup_rbtdb:
7265         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
7266         return (result);
7267 }
7268
7269
7270 /*
7271  * Slabbed Rdataset Methods
7272  */
7273
7274 static void
7275 rdataset_disassociate(dns_rdataset_t *rdataset) {
7276         dns_db_t *db = rdataset->private1;
7277         dns_dbnode_t *node = rdataset->private2;
7278
7279         detachnode(db, &node);
7280 }
7281
7282 static isc_result_t
7283 rdataset_first(dns_rdataset_t *rdataset) {
7284         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7285         unsigned int count;
7286
7287         count = raw[0] * 256 + raw[1];
7288         if (count == 0) {
7289                 rdataset->private5 = NULL;
7290                 return (ISC_R_NOMORE);
7291         }
7292
7293 #if DNS_RDATASET_FIXED
7294         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7295                 raw += 2 + (4 * count);
7296         else
7297 #endif
7298                 raw += 2;
7299
7300         /*
7301          * The privateuint4 field is the number of rdata beyond the
7302          * cursor position, so we decrement the total count by one
7303          * before storing it.
7304          *
7305          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7306          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7307          * to the first entry in the offset table.
7308          */
7309         count--;
7310         rdataset->privateuint4 = count;
7311         rdataset->private5 = raw;
7312
7313         return (ISC_R_SUCCESS);
7314 }
7315
7316 static isc_result_t
7317 rdataset_next(dns_rdataset_t *rdataset) {
7318         unsigned int count;
7319         unsigned int length;
7320         unsigned char *raw;     /* RDATASLAB */
7321
7322         count = rdataset->privateuint4;
7323         if (count == 0)
7324                 return (ISC_R_NOMORE);
7325         count--;
7326         rdataset->privateuint4 = count;
7327
7328         /*
7329          * Skip forward one record (length + 4) or one offset (4).
7330          */
7331         raw = rdataset->private5;
7332 #if DNS_RDATASET_FIXED
7333         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7334 #endif
7335                 length = raw[0] * 256 + raw[1];
7336                 raw += length;
7337 #if DNS_RDATASET_FIXED
7338         }
7339         rdataset->private5 = raw + 4;           /* length(2) + order(2) */
7340 #else
7341         rdataset->private5 = raw + 2;           /* length(2) */
7342 #endif
7343
7344         return (ISC_R_SUCCESS);
7345 }
7346
7347 static void
7348 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7349         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
7350 #if DNS_RDATASET_FIXED
7351         unsigned int offset;
7352 #endif
7353         unsigned int length;
7354         isc_region_t r;
7355         unsigned int flags = 0;
7356
7357         REQUIRE(raw != NULL);
7358
7359         /*
7360          * Find the start of the record if not already in private5
7361          * then skip the length and order fields.
7362          */
7363 #if DNS_RDATASET_FIXED
7364         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7365                 offset = (raw[0] << 24) + (raw[1] << 16) +
7366                          (raw[2] << 8) + raw[3];
7367                 raw = rdataset->private3;
7368                 raw += offset;
7369         }
7370 #endif
7371         length = raw[0] * 256 + raw[1];
7372 #if DNS_RDATASET_FIXED
7373         raw += 4;
7374 #else
7375         raw += 2;
7376 #endif
7377         if (rdataset->type == dns_rdatatype_rrsig) {
7378                 if (*raw & DNS_RDATASLAB_OFFLINE)
7379                         flags |= DNS_RDATA_OFFLINE;
7380                 length--;
7381                 raw++;
7382         }
7383         r.length = length;
7384         r.base = raw;
7385         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7386         rdata->flags |= flags;
7387 }
7388
7389 static void
7390 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7391         dns_db_t *db = source->private1;
7392         dns_dbnode_t *node = source->private2;
7393         dns_dbnode_t *cloned_node = NULL;
7394
7395         attachnode(db, node, &cloned_node);
7396         *target = *source;
7397
7398         /*
7399          * Reset iterator state.
7400          */
7401         target->privateuint4 = 0;
7402         target->private5 = NULL;
7403 }
7404
7405 static unsigned int
7406 rdataset_count(dns_rdataset_t *rdataset) {
7407         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7408         unsigned int count;
7409
7410         count = raw[0] * 256 + raw[1];
7411
7412         return (count);
7413 }
7414
7415 static isc_result_t
7416 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7417                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7418 {
7419         dns_db_t *db = rdataset->private1;
7420         dns_dbnode_t *node = rdataset->private2;
7421         dns_dbnode_t *cloned_node;
7422         struct noqname *noqname = rdataset->private6;
7423
7424         cloned_node = NULL;
7425         attachnode(db, node, &cloned_node);
7426         nsec->methods = &rdataset_methods;
7427         nsec->rdclass = db->rdclass;
7428         nsec->type = noqname->type;
7429         nsec->covers = 0;
7430         nsec->ttl = rdataset->ttl;
7431         nsec->trust = rdataset->trust;
7432         nsec->private1 = rdataset->private1;
7433         nsec->private2 = rdataset->private2;
7434         nsec->private3 = noqname->neg;
7435         nsec->privateuint4 = 0;
7436         nsec->private5 = NULL;
7437         nsec->private6 = NULL;
7438         nsec->private7 = NULL;
7439
7440         cloned_node = NULL;
7441         attachnode(db, node, &cloned_node);
7442         nsecsig->methods = &rdataset_methods;
7443         nsecsig->rdclass = db->rdclass;
7444         nsecsig->type = dns_rdatatype_rrsig;
7445         nsecsig->covers = noqname->type;
7446         nsecsig->ttl = rdataset->ttl;
7447         nsecsig->trust = rdataset->trust;
7448         nsecsig->private1 = rdataset->private1;
7449         nsecsig->private2 = rdataset->private2;
7450         nsecsig->private3 = noqname->negsig;
7451         nsecsig->privateuint4 = 0;
7452         nsecsig->private5 = NULL;
7453         nsec->private6 = NULL;
7454         nsec->private7 = NULL;
7455
7456         dns_name_clone(&noqname->name, name);
7457
7458         return (ISC_R_SUCCESS);
7459 }
7460
7461 static isc_result_t
7462 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7463                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7464 {
7465         dns_db_t *db = rdataset->private1;
7466         dns_dbnode_t *node = rdataset->private2;
7467         dns_dbnode_t *cloned_node;
7468         struct noqname *closest = rdataset->private7;
7469
7470         cloned_node = NULL;
7471         attachnode(db, node, &cloned_node);
7472         nsec->methods = &rdataset_methods;
7473         nsec->rdclass = db->rdclass;
7474         nsec->type = closest->type;
7475         nsec->covers = 0;
7476         nsec->ttl = rdataset->ttl;
7477         nsec->trust = rdataset->trust;
7478         nsec->private1 = rdataset->private1;
7479         nsec->private2 = rdataset->private2;
7480         nsec->private3 = closest->neg;
7481         nsec->privateuint4 = 0;
7482         nsec->private5 = NULL;
7483         nsec->private6 = NULL;
7484         nsec->private7 = NULL;
7485
7486         cloned_node = NULL;
7487         attachnode(db, node, &cloned_node);
7488         nsecsig->methods = &rdataset_methods;
7489         nsecsig->rdclass = db->rdclass;
7490         nsecsig->type = dns_rdatatype_rrsig;
7491         nsecsig->covers = closest->type;
7492         nsecsig->ttl = rdataset->ttl;
7493         nsecsig->trust = rdataset->trust;
7494         nsecsig->private1 = rdataset->private1;
7495         nsecsig->private2 = rdataset->private2;
7496         nsecsig->private3 = closest->negsig;
7497         nsecsig->privateuint4 = 0;
7498         nsecsig->private5 = NULL;
7499         nsec->private6 = NULL;
7500         nsec->private7 = NULL;
7501
7502         dns_name_clone(&closest->name, name);
7503
7504         return (ISC_R_SUCCESS);
7505 }
7506
7507 static void
7508 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
7509         dns_rbtdb_t *rbtdb = rdataset->private1;
7510         dns_rbtnode_t *rbtnode = rdataset->private2;
7511         rdatasetheader_t *header = rdataset->private3;
7512
7513         header--;
7514         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7515                   isc_rwlocktype_write);
7516         header->trust = rdataset->trust = trust;
7517         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7518                   isc_rwlocktype_write);
7519 }
7520
7521 static void
7522 rdataset_expire(dns_rdataset_t *rdataset) {
7523         dns_rbtdb_t *rbtdb = rdataset->private1;
7524         dns_rbtnode_t *rbtnode = rdataset->private2;
7525         rdatasetheader_t *header = rdataset->private3;
7526
7527         header--;
7528         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7529                   isc_rwlocktype_write);
7530         expire_header(rbtdb, header, ISC_FALSE);
7531         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7532                   isc_rwlocktype_write);
7533 }
7534
7535 /*
7536  * Rdataset Iterator Methods
7537  */
7538
7539 static void
7540 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7541         rbtdb_rdatasetiter_t *rbtiterator;
7542
7543         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7544
7545         if (rbtiterator->common.version != NULL)
7546                 closeversion(rbtiterator->common.db,
7547                              &rbtiterator->common.version, ISC_FALSE);
7548         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7549         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7550                     sizeof(*rbtiterator));
7551
7552         *iteratorp = NULL;
7553 }
7554
7555 static isc_result_t
7556 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7557         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7558         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7559         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7560         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7561         rdatasetheader_t *header, *top_next;
7562         rbtdb_serial_t serial;
7563         isc_stdtime_t now;
7564
7565         if (IS_CACHE(rbtdb)) {
7566                 serial = 1;
7567                 now = rbtiterator->common.now;
7568         } else {
7569                 serial = rbtversion->serial;
7570                 now = 0;
7571         }
7572
7573         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7574                   isc_rwlocktype_read);
7575
7576         for (header = rbtnode->data; header != NULL; header = top_next) {
7577                 top_next = header->next;
7578                 do {
7579                         if (header->serial <= serial && !IGNORE(header)) {
7580                                 /*
7581                                  * Is this a "this rdataset doesn't exist"
7582                                  * record?  Or is it too old in the cache?
7583                                  *
7584                                  * Note: unlike everywhere else, we
7585                                  * check for now > header->rdh_ttl instead
7586                                  * of now >= header->rdh_ttl.  This allows
7587                                  * ANY and RRSIG queries for 0 TTL
7588                                  * rdatasets to work.
7589                                  */
7590                                 if (NONEXISTENT(header) ||
7591                                     (now != 0 && now > header->rdh_ttl))
7592                                         header = NULL;
7593                                 break;
7594                         } else
7595                                 header = header->down;
7596                 } while (header != NULL);
7597                 if (header != NULL)
7598                         break;
7599         }
7600
7601         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7602                     isc_rwlocktype_read);
7603
7604         rbtiterator->current = header;
7605
7606         if (header == NULL)
7607                 return (ISC_R_NOMORE);
7608
7609         return (ISC_R_SUCCESS);
7610 }
7611
7612 static isc_result_t
7613 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7614         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7615         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7616         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7617         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7618         rdatasetheader_t *header, *top_next;
7619         rbtdb_serial_t serial;
7620         isc_stdtime_t now;
7621         rbtdb_rdatatype_t type, negtype;
7622         dns_rdatatype_t rdtype, covers;
7623
7624         header = rbtiterator->current;
7625         if (header == NULL)
7626                 return (ISC_R_NOMORE);
7627
7628         if (IS_CACHE(rbtdb)) {
7629                 serial = 1;
7630                 now = rbtiterator->common.now;
7631         } else {
7632                 serial = rbtversion->serial;
7633                 now = 0;
7634         }
7635
7636         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7637                   isc_rwlocktype_read);
7638
7639         type = header->type;
7640         rdtype = RBTDB_RDATATYPE_BASE(header->type);
7641         if (rdtype == 0) {
7642                 covers = RBTDB_RDATATYPE_EXT(header->type);
7643                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7644         } else
7645                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7646         for (header = header->next; header != NULL; header = top_next) {
7647                 top_next = header->next;
7648                 /*
7649                  * If not walking back up the down list.
7650                  */
7651                 if (header->type != type && header->type != negtype) {
7652                         do {
7653                                 if (header->serial <= serial &&
7654                                     !IGNORE(header)) {
7655                                         /*
7656                                          * Is this a "this rdataset doesn't
7657                                          * exist" record?
7658                                          *
7659                                          * Note: unlike everywhere else, we
7660                                          * check for now > header->ttl instead
7661                                          * of now >= header->ttl.  This allows
7662                                          * ANY and RRSIG queries for 0 TTL
7663                                          * rdatasets to work.
7664                                          */
7665                                         if ((header->attributes &
7666                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
7667                                             (now != 0 && now > header->rdh_ttl))
7668                                                 header = NULL;
7669                                         break;
7670                                 } else
7671                                         header = header->down;
7672                         } while (header != NULL);
7673                         if (header != NULL)
7674                                 break;
7675                 }
7676         }
7677
7678         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7679                     isc_rwlocktype_read);
7680
7681         rbtiterator->current = header;
7682
7683         if (header == NULL)
7684                 return (ISC_R_NOMORE);
7685
7686         return (ISC_R_SUCCESS);
7687 }
7688
7689 static void
7690 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7691         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7692         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7693         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7694         rdatasetheader_t *header;
7695
7696         header = rbtiterator->current;
7697         REQUIRE(header != NULL);
7698
7699         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7700                   isc_rwlocktype_read);
7701
7702         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7703                       rdataset);
7704
7705         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7706                     isc_rwlocktype_read);
7707 }
7708
7709
7710 /*
7711  * Database Iterator Methods
7712  */
7713
7714 static inline void
7715 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7716         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7717         dns_rbtnode_t *node = rbtdbiter->node;
7718
7719         if (node == NULL)
7720                 return;
7721
7722         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7723         reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7724 }
7725
7726 static inline void
7727 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7728         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7729         dns_rbtnode_t *node = rbtdbiter->node;
7730         nodelock_t *lock;
7731
7732         if (node == NULL)
7733                 return;
7734
7735         lock = &rbtdb->node_locks[node->locknum].lock;
7736         NODE_LOCK(lock, isc_rwlocktype_read);
7737         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7738                             rbtdbiter->tree_locked, ISC_FALSE);
7739         NODE_UNLOCK(lock, isc_rwlocktype_read);
7740
7741         rbtdbiter->node = NULL;
7742 }
7743
7744 static void
7745 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7746         dns_rbtnode_t *node;
7747         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7748         isc_boolean_t was_read_locked = ISC_FALSE;
7749         nodelock_t *lock;
7750         int i;
7751
7752         if (rbtdbiter->delete != 0) {
7753                 /*
7754                  * Note that "%d node of %d in tree" can report things like
7755                  * "flush_deletions: 59 nodes of 41 in tree".  This means
7756                  * That some nodes appear on the deletions list more than
7757                  * once.  Only the last occurence will actually be deleted.
7758                  */
7759                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7760                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7761                               "flush_deletions: %d nodes of %d in tree",
7762                               rbtdbiter->delete,
7763                               dns_rbt_nodecount(rbtdb->tree));
7764
7765                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7766                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7767                         was_read_locked = ISC_TRUE;
7768                 }
7769                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7770                 rbtdbiter->tree_locked = isc_rwlocktype_write;
7771
7772                 for (i = 0; i < rbtdbiter->delete; i++) {
7773                         node = rbtdbiter->deletions[i];
7774                         lock = &rbtdb->node_locks[node->locknum].lock;
7775
7776                         NODE_LOCK(lock, isc_rwlocktype_read);
7777                         decrement_reference(rbtdb, node, 0,
7778                                             isc_rwlocktype_read,
7779                                             rbtdbiter->tree_locked, ISC_FALSE);
7780                         NODE_UNLOCK(lock, isc_rwlocktype_read);
7781                 }
7782
7783                 rbtdbiter->delete = 0;
7784
7785                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7786                 if (was_read_locked) {
7787                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7788                         rbtdbiter->tree_locked = isc_rwlocktype_read;
7789
7790                 } else {
7791                         rbtdbiter->tree_locked = isc_rwlocktype_none;
7792                 }
7793         }
7794 }
7795
7796 static inline void
7797 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
7798         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7799
7800         REQUIRE(rbtdbiter->paused);
7801         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
7802
7803         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7804         rbtdbiter->tree_locked = isc_rwlocktype_read;
7805
7806         rbtdbiter->paused = ISC_FALSE;
7807 }
7808
7809 static void
7810 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
7811         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
7812         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7813         dns_db_t *db = NULL;
7814
7815         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7816                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7817                 rbtdbiter->tree_locked = isc_rwlocktype_none;
7818         } else
7819                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
7820
7821         dereference_iter_node(rbtdbiter);
7822
7823         flush_deletions(rbtdbiter);
7824
7825         dns_db_attach(rbtdbiter->common.db, &db);
7826         dns_db_detach(&rbtdbiter->common.db);
7827
7828         dns_rbtnodechain_reset(&rbtdbiter->chain);
7829         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7830         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
7831         dns_db_detach(&db);
7832
7833         *iteratorp = NULL;
7834 }
7835
7836 static isc_result_t
7837 dbiterator_first(dns_dbiterator_t *iterator) {
7838         isc_result_t result;
7839         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7840         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7841         dns_name_t *name, *origin;
7842
7843         if (rbtdbiter->result != ISC_R_SUCCESS &&
7844             rbtdbiter->result != ISC_R_NOMORE)
7845                 return (rbtdbiter->result);
7846
7847         if (rbtdbiter->paused)
7848                 resume_iteration(rbtdbiter);
7849
7850         dereference_iter_node(rbtdbiter);
7851
7852         name = dns_fixedname_name(&rbtdbiter->name);
7853         origin = dns_fixedname_name(&rbtdbiter->origin);
7854         dns_rbtnodechain_reset(&rbtdbiter->chain);
7855         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7856
7857         if (rbtdbiter->nsec3only) {
7858                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7859                 result = dns_rbtnodechain_first(rbtdbiter->current,
7860                                                 rbtdb->nsec3, name, origin);
7861         } else {
7862                 rbtdbiter->current = &rbtdbiter->chain;
7863                 result = dns_rbtnodechain_first(rbtdbiter->current,
7864                                                 rbtdb->tree, name, origin);
7865                 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
7866                         rbtdbiter->current = &rbtdbiter->nsec3chain;
7867                         result = dns_rbtnodechain_first(rbtdbiter->current,
7868                                                         rbtdb->nsec3, name,
7869                                                         origin);
7870                 }
7871         }
7872         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7873                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7874                                                   NULL, &rbtdbiter->node);
7875                 if (result == ISC_R_SUCCESS) {
7876                         rbtdbiter->new_origin = ISC_TRUE;
7877                         reference_iter_node(rbtdbiter);
7878                 }
7879         } else {
7880                 INSIST(result == ISC_R_NOTFOUND);
7881                 result = ISC_R_NOMORE; /* The tree is empty. */
7882         }
7883
7884         rbtdbiter->result = result;
7885
7886         return (result);
7887 }
7888
7889 static isc_result_t
7890 dbiterator_last(dns_dbiterator_t *iterator) {
7891         isc_result_t result;
7892         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7893         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7894         dns_name_t *name, *origin;
7895
7896         if (rbtdbiter->result != ISC_R_SUCCESS &&
7897             rbtdbiter->result != ISC_R_NOMORE)
7898                 return (rbtdbiter->result);
7899
7900         if (rbtdbiter->paused)
7901                 resume_iteration(rbtdbiter);
7902
7903         dereference_iter_node(rbtdbiter);
7904
7905         name = dns_fixedname_name(&rbtdbiter->name);
7906         origin = dns_fixedname_name(&rbtdbiter->origin);
7907         dns_rbtnodechain_reset(&rbtdbiter->chain);
7908         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7909
7910         result = ISC_R_NOTFOUND;
7911         if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
7912                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7913                 result = dns_rbtnodechain_last(rbtdbiter->current,
7914                                                rbtdb->nsec3, name, origin);
7915         }
7916         if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
7917                 rbtdbiter->current = &rbtdbiter->chain;
7918                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7919                                                name, origin);
7920         }
7921         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7922                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7923                                                   NULL, &rbtdbiter->node);
7924                 if (result == ISC_R_SUCCESS) {
7925                         rbtdbiter->new_origin = ISC_TRUE;
7926                         reference_iter_node(rbtdbiter);
7927                 }
7928         } else {
7929                 INSIST(result == ISC_R_NOTFOUND);
7930                 result = ISC_R_NOMORE; /* The tree is empty. */
7931         }
7932
7933         rbtdbiter->result = result;
7934
7935         return (result);
7936 }
7937
7938 static isc_result_t
7939 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
7940         isc_result_t result;
7941         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7942         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7943         dns_name_t *iname, *origin;
7944
7945         if (rbtdbiter->result != ISC_R_SUCCESS &&
7946             rbtdbiter->result != ISC_R_NOTFOUND &&
7947             rbtdbiter->result != ISC_R_NOMORE)
7948                 return (rbtdbiter->result);
7949
7950         if (rbtdbiter->paused)
7951                 resume_iteration(rbtdbiter);
7952
7953         dereference_iter_node(rbtdbiter);
7954
7955         iname = dns_fixedname_name(&rbtdbiter->name);
7956         origin = dns_fixedname_name(&rbtdbiter->origin);
7957         dns_rbtnodechain_reset(&rbtdbiter->chain);
7958         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7959
7960         if (rbtdbiter->nsec3only) {
7961                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7962                 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7963                                           &rbtdbiter->node,
7964                                           rbtdbiter->current,
7965                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7966         } else if (rbtdbiter->nonsec3) {
7967                 rbtdbiter->current = &rbtdbiter->chain;
7968                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7969                                           &rbtdbiter->node,
7970                                           rbtdbiter->current,
7971                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7972         } else {
7973                 /*
7974                  * Stay on main chain if not found on either chain.
7975                  */
7976                 rbtdbiter->current = &rbtdbiter->chain;
7977                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7978                                           &rbtdbiter->node,
7979                                           rbtdbiter->current,
7980                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7981                 if (result == DNS_R_PARTIALMATCH) {
7982                         dns_rbtnode_t *node = NULL;
7983                         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7984                                                   &node, &rbtdbiter->nsec3chain,
7985                                                   DNS_RBTFIND_EMPTYDATA,
7986                                                   NULL, NULL);
7987                         if (result == ISC_R_SUCCESS) {
7988                                 rbtdbiter->node = node;
7989                                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7990                         }
7991                 }
7992         }
7993
7994 #if 1
7995         if (result == ISC_R_SUCCESS) {
7996                 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
7997                                                   origin, NULL);
7998                 if (result == ISC_R_SUCCESS) {
7999                         rbtdbiter->new_origin = ISC_TRUE;
8000                         reference_iter_node(rbtdbiter);
8001                 }
8002         } else if (result == DNS_R_PARTIALMATCH) {
8003                 result = ISC_R_NOTFOUND;
8004                 rbtdbiter->node = NULL;
8005         }
8006
8007         rbtdbiter->result = result;
8008 #else
8009         if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8010                 isc_result_t tresult;
8011                 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
8012                                                    origin, NULL);
8013                 if (tresult == ISC_R_SUCCESS) {
8014                         rbtdbiter->new_origin = ISC_TRUE;
8015                         reference_iter_node(rbtdbiter);
8016                 } else {
8017                         result = tresult;
8018                         rbtdbiter->node = NULL;
8019                 }
8020         } else
8021                 rbtdbiter->node = NULL;
8022
8023         rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
8024                             ISC_R_SUCCESS : result;
8025 #endif
8026
8027         return (result);
8028 }
8029
8030 static isc_result_t
8031 dbiterator_prev(dns_dbiterator_t *iterator) {
8032         isc_result_t result;
8033         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8034         dns_name_t *name, *origin;
8035         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8036
8037         REQUIRE(rbtdbiter->node != NULL);
8038
8039         if (rbtdbiter->result != ISC_R_SUCCESS)
8040                 return (rbtdbiter->result);
8041
8042         if (rbtdbiter->paused)
8043                 resume_iteration(rbtdbiter);
8044
8045         name = dns_fixedname_name(&rbtdbiter->name);
8046         origin = dns_fixedname_name(&rbtdbiter->origin);
8047         result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
8048         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8049             !rbtdbiter->nonsec3 &&
8050             &rbtdbiter->nsec3chain == rbtdbiter->current) {
8051                 rbtdbiter->current = &rbtdbiter->chain;
8052                 dns_rbtnodechain_reset(rbtdbiter->current);
8053                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8054                                                name, origin);
8055                 if (result == ISC_R_NOTFOUND)
8056                         result = ISC_R_NOMORE;
8057         }
8058
8059         dereference_iter_node(rbtdbiter);
8060
8061         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8062                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8063                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8064                                                   NULL, &rbtdbiter->node);
8065         }
8066
8067         if (result == ISC_R_SUCCESS)
8068                 reference_iter_node(rbtdbiter);
8069
8070         rbtdbiter->result = result;
8071
8072         return (result);
8073 }
8074
8075 static isc_result_t
8076 dbiterator_next(dns_dbiterator_t *iterator) {
8077         isc_result_t result;
8078         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8079         dns_name_t *name, *origin;
8080         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8081
8082         REQUIRE(rbtdbiter->node != NULL);
8083
8084         if (rbtdbiter->result != ISC_R_SUCCESS)
8085                 return (rbtdbiter->result);
8086
8087         if (rbtdbiter->paused)
8088                 resume_iteration(rbtdbiter);
8089
8090         name = dns_fixedname_name(&rbtdbiter->name);
8091         origin = dns_fixedname_name(&rbtdbiter->origin);
8092         result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8093         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8094             !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8095                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8096                 dns_rbtnodechain_reset(rbtdbiter->current);
8097                 result = dns_rbtnodechain_first(rbtdbiter->current,
8098                                                 rbtdb->nsec3, name, origin);
8099                 if (result == ISC_R_NOTFOUND)
8100                         result = ISC_R_NOMORE;
8101         }
8102
8103         dereference_iter_node(rbtdbiter);
8104
8105         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8106                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8107                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8108                                                   NULL, &rbtdbiter->node);
8109         }
8110         if (result == ISC_R_SUCCESS)
8111                 reference_iter_node(rbtdbiter);
8112
8113         rbtdbiter->result = result;
8114
8115         return (result);
8116 }
8117
8118 static isc_result_t
8119 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8120                    dns_name_t *name)
8121 {
8122         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8123         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8124         dns_rbtnode_t *node = rbtdbiter->node;
8125         isc_result_t result;
8126         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8127         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8128
8129         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8130         REQUIRE(rbtdbiter->node != NULL);
8131
8132         if (rbtdbiter->paused)
8133                 resume_iteration(rbtdbiter);
8134
8135         if (name != NULL) {
8136                 if (rbtdbiter->common.relative_names)
8137                         origin = NULL;
8138                 result = dns_name_concatenate(nodename, origin, name, NULL);
8139                 if (result != ISC_R_SUCCESS)
8140                         return (result);
8141                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8142                         result = DNS_R_NEWORIGIN;
8143         } else
8144                 result = ISC_R_SUCCESS;
8145
8146         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8147         new_reference(rbtdb, node);
8148         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8149
8150         *nodep = rbtdbiter->node;
8151
8152         if (iterator->cleaning && result == ISC_R_SUCCESS) {
8153                 isc_result_t expire_result;
8154
8155                 /*
8156                  * If the deletion array is full, flush it before trying
8157                  * to expire the current node.  The current node can't
8158                  * fully deleted while the iteration cursor is still on it.
8159                  */
8160                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8161                         flush_deletions(rbtdbiter);
8162
8163                 expire_result = expirenode(iterator->db, *nodep, 0);
8164
8165                 /*
8166                  * expirenode() currently always returns success.
8167                  */
8168                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8169                         unsigned int refs;
8170
8171                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
8172                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8173                         dns_rbtnode_refincrement(node, &refs);
8174                         INSIST(refs != 0);
8175                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8176                 }
8177         }
8178
8179         return (result);
8180 }
8181
8182 static isc_result_t
8183 dbiterator_pause(dns_dbiterator_t *iterator) {
8184         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8185         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8186
8187         if (rbtdbiter->result != ISC_R_SUCCESS &&
8188             rbtdbiter->result != ISC_R_NOMORE)
8189                 return (rbtdbiter->result);
8190
8191         if (rbtdbiter->paused)
8192                 return (ISC_R_SUCCESS);
8193
8194         rbtdbiter->paused = ISC_TRUE;
8195
8196         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8197                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8198                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8199                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8200         }
8201
8202         flush_deletions(rbtdbiter);
8203
8204         return (ISC_R_SUCCESS);
8205 }
8206
8207 static isc_result_t
8208 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8209         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8210         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8211
8212         if (rbtdbiter->result != ISC_R_SUCCESS)
8213                 return (rbtdbiter->result);
8214
8215         return (dns_name_copy(origin, name, NULL));
8216 }
8217
8218 /*%
8219  * Additional cache routines.
8220  */
8221 static isc_result_t
8222 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8223                        dns_rdatatype_t qtype, dns_acache_t *acache,
8224                        dns_zone_t **zonep, dns_db_t **dbp,
8225                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8226                        dns_name_t *fname, dns_message_t *msg,
8227                        isc_stdtime_t now)
8228 {
8229         dns_rbtdb_t *rbtdb = rdataset->private1;
8230         dns_rbtnode_t *rbtnode = rdataset->private2;
8231         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8232         unsigned int current_count = rdataset->privateuint4;
8233         unsigned int count;
8234         rdatasetheader_t *header;
8235         nodelock_t *nodelock;
8236         unsigned int total_count;
8237         acachectl_t *acarray;
8238         dns_acacheentry_t *entry;
8239         isc_result_t result;
8240
8241         UNUSED(qtype); /* we do not use this value at least for now */
8242         UNUSED(acache);
8243
8244         header = (struct rdatasetheader *)(raw - sizeof(*header));
8245
8246         total_count = raw[0] * 256 + raw[1];
8247         INSIST(total_count > current_count);
8248         count = total_count - current_count - 1;
8249
8250         acarray = NULL;
8251
8252         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8253         NODE_LOCK(nodelock, isc_rwlocktype_read);
8254
8255         switch (type) {
8256         case dns_rdatasetadditional_fromauth:
8257                 acarray = header->additional_auth;
8258                 break;
8259         case dns_rdatasetadditional_fromcache:
8260                 acarray = NULL;
8261                 break;
8262         case dns_rdatasetadditional_fromglue:
8263                 acarray = header->additional_glue;
8264                 break;
8265         default:
8266                 INSIST(0);
8267         }
8268
8269         if (acarray == NULL) {
8270                 if (type != dns_rdatasetadditional_fromcache)
8271                         dns_acache_countquerymiss(acache);
8272                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8273                 return (ISC_R_NOTFOUND);
8274         }
8275
8276         if (acarray[count].entry == NULL) {
8277                 dns_acache_countquerymiss(acache);
8278                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8279                 return (ISC_R_NOTFOUND);
8280         }
8281
8282         entry = NULL;
8283         dns_acache_attachentry(acarray[count].entry, &entry);
8284
8285         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8286
8287         result = dns_acache_getentry(entry, zonep, dbp, versionp,
8288                                      nodep, fname, msg, now);
8289
8290         dns_acache_detachentry(&entry);
8291
8292         return (result);
8293 }
8294
8295 static void
8296 acache_callback(dns_acacheentry_t *entry, void **arg) {
8297         dns_rbtdb_t *rbtdb;
8298         dns_rbtnode_t *rbtnode;
8299         nodelock_t *nodelock;
8300         acachectl_t *acarray = NULL;
8301         acache_cbarg_t *cbarg;
8302         unsigned int count;
8303
8304         REQUIRE(arg != NULL);
8305         cbarg = *arg;
8306
8307         /*
8308          * The caller must hold the entry lock.
8309          */
8310
8311         rbtdb = (dns_rbtdb_t *)cbarg->db;
8312         rbtnode = (dns_rbtnode_t *)cbarg->node;
8313
8314         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8315         NODE_LOCK(nodelock, isc_rwlocktype_write);
8316
8317         switch (cbarg->type) {
8318         case dns_rdatasetadditional_fromauth:
8319                 acarray = cbarg->header->additional_auth;
8320                 break;
8321         case dns_rdatasetadditional_fromglue:
8322                 acarray = cbarg->header->additional_glue;
8323                 break;
8324         default:
8325                 INSIST(0);
8326         }
8327
8328         count = cbarg->count;
8329         if (acarray != NULL && acarray[count].entry == entry) {
8330                 acarray[count].entry = NULL;
8331                 INSIST(acarray[count].cbarg == cbarg);
8332                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8333                 acarray[count].cbarg = NULL;
8334         } else
8335                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8336
8337         dns_acache_detachentry(&entry);
8338
8339         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8340
8341         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8342         dns_db_detach((dns_db_t **)(void*)&rbtdb);
8343
8344         *arg = NULL;
8345 }
8346
8347 static void
8348 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8349                       acache_cbarg_t **cbargp)
8350 {
8351         acache_cbarg_t *cbarg;
8352
8353         REQUIRE(mctx != NULL);
8354         REQUIRE(entry != NULL);
8355         REQUIRE(cbargp != NULL && *cbargp != NULL);
8356
8357         cbarg = *cbargp;
8358
8359         dns_acache_cancelentry(entry);
8360         dns_db_detachnode(cbarg->db, &cbarg->node);
8361         dns_db_detach(&cbarg->db);
8362
8363         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8364
8365         *cbargp = NULL;
8366 }
8367
8368 static isc_result_t
8369 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8370                        dns_rdatatype_t qtype, dns_acache_t *acache,
8371                        dns_zone_t *zone, dns_db_t *db,
8372                        dns_dbversion_t *version, dns_dbnode_t *node,
8373                        dns_name_t *fname)
8374 {
8375         dns_rbtdb_t *rbtdb = rdataset->private1;
8376         dns_rbtnode_t *rbtnode = rdataset->private2;
8377         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8378         unsigned int current_count = rdataset->privateuint4;
8379         rdatasetheader_t *header;
8380         unsigned int total_count, count;
8381         nodelock_t *nodelock;
8382         isc_result_t result;
8383         acachectl_t *acarray;
8384         dns_acacheentry_t *newentry, *oldentry = NULL;
8385         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8386
8387         UNUSED(qtype);
8388
8389         if (type == dns_rdatasetadditional_fromcache)
8390                 return (ISC_R_SUCCESS);
8391
8392         header = (struct rdatasetheader *)(raw - sizeof(*header));
8393
8394         total_count = raw[0] * 256 + raw[1];
8395         INSIST(total_count > current_count);
8396         count = total_count - current_count - 1; /* should be private data */
8397
8398         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8399         if (newcbarg == NULL)
8400                 return (ISC_R_NOMEMORY);
8401         newcbarg->type = type;
8402         newcbarg->count = count;
8403         newcbarg->header = header;
8404         newcbarg->db = NULL;
8405         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8406         newcbarg->node = NULL;
8407         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8408                           &newcbarg->node);
8409         newentry = NULL;
8410         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8411                                         acache_callback, newcbarg, &newentry);
8412         if (result != ISC_R_SUCCESS)
8413                 goto fail;
8414         /* Set cache data in the new entry. */
8415         result = dns_acache_setentry(acache, newentry, zone, db,
8416                                      version, node, fname);
8417         if (result != ISC_R_SUCCESS)
8418                 goto fail;
8419
8420         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8421         NODE_LOCK(nodelock, isc_rwlocktype_write);
8422
8423         acarray = NULL;
8424         switch (type) {
8425         case dns_rdatasetadditional_fromauth:
8426                 acarray = header->additional_auth;
8427                 break;
8428         case dns_rdatasetadditional_fromglue:
8429                 acarray = header->additional_glue;
8430                 break;
8431         default:
8432                 INSIST(0);
8433         }
8434
8435         if (acarray == NULL) {
8436                 unsigned int i;
8437
8438                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8439                                       sizeof(acachectl_t));
8440
8441                 if (acarray == NULL) {
8442                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8443                         goto fail;
8444                 }
8445
8446                 for (i = 0; i < total_count; i++) {
8447                         acarray[i].entry = NULL;
8448                         acarray[i].cbarg = NULL;
8449                 }
8450         }
8451         switch (type) {
8452         case dns_rdatasetadditional_fromauth:
8453                 header->additional_auth = acarray;
8454                 break;
8455         case dns_rdatasetadditional_fromglue:
8456                 header->additional_glue = acarray;
8457                 break;
8458         default:
8459                 INSIST(0);
8460         }
8461
8462         if (acarray[count].entry != NULL) {
8463                 /*
8464                  * Swap the entry.  Delay cleaning-up the old entry since
8465                  * it would require a node lock.
8466                  */
8467                 oldentry = acarray[count].entry;
8468                 INSIST(acarray[count].cbarg != NULL);
8469                 oldcbarg = acarray[count].cbarg;
8470         }
8471         acarray[count].entry = newentry;
8472         acarray[count].cbarg = newcbarg;
8473
8474         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8475
8476         if (oldentry != NULL) {
8477                 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8478                 dns_acache_detachentry(&oldentry);
8479         }
8480
8481         return (ISC_R_SUCCESS);
8482
8483  fail:
8484         if (newcbarg != NULL) {
8485                 if (newentry != NULL) {
8486                         acache_cancelentry(rbtdb->common.mctx, newentry,
8487                                            &newcbarg);
8488                         dns_acache_detachentry(&newentry);
8489                 } else {
8490                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8491                         dns_db_detach(&newcbarg->db);
8492                         isc_mem_put(rbtdb->common.mctx, newcbarg,
8493                             sizeof(*newcbarg));
8494                 }
8495         }
8496
8497         return (result);
8498 }
8499
8500 static isc_result_t
8501 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8502                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8503 {
8504         dns_rbtdb_t *rbtdb = rdataset->private1;
8505         dns_rbtnode_t *rbtnode = rdataset->private2;
8506         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8507         unsigned int current_count = rdataset->privateuint4;
8508         rdatasetheader_t *header;
8509         nodelock_t *nodelock;
8510         unsigned int total_count, count;
8511         acachectl_t *acarray;
8512         dns_acacheentry_t *entry;
8513         acache_cbarg_t *cbarg;
8514
8515         UNUSED(qtype);          /* we do not use this value at least for now */
8516         UNUSED(acache);
8517
8518         if (type == dns_rdatasetadditional_fromcache)
8519                 return (ISC_R_SUCCESS);
8520
8521         header = (struct rdatasetheader *)(raw - sizeof(*header));
8522
8523         total_count = raw[0] * 256 + raw[1];
8524         INSIST(total_count > current_count);
8525         count = total_count - current_count - 1;
8526
8527         acarray = NULL;
8528         entry = NULL;
8529
8530         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8531         NODE_LOCK(nodelock, isc_rwlocktype_write);
8532
8533         switch (type) {
8534         case dns_rdatasetadditional_fromauth:
8535                 acarray = header->additional_auth;
8536                 break;
8537         case dns_rdatasetadditional_fromglue:
8538                 acarray = header->additional_glue;
8539                 break;
8540         default:
8541                 INSIST(0);
8542         }
8543
8544         if (acarray == NULL) {
8545                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8546                 return (ISC_R_NOTFOUND);
8547         }
8548
8549         entry = acarray[count].entry;
8550         if (entry == NULL) {
8551                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8552                 return (ISC_R_NOTFOUND);
8553         }
8554
8555         acarray[count].entry = NULL;
8556         cbarg = acarray[count].cbarg;
8557         acarray[count].cbarg = NULL;
8558
8559         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8560
8561         if (entry != NULL) {
8562                 if (cbarg != NULL)
8563                         acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8564                 dns_acache_detachentry(&entry);
8565         }
8566
8567         return (ISC_R_SUCCESS);
8568 }
8569
8570 /*%
8571  * Routines for LRU-based cache management.
8572  */
8573
8574 /*%
8575  * See if a given cache entry that is being reused needs to be updated
8576  * in the LRU-list.  From the LRU management point of view, this function is
8577  * expected to return true for almost all cases.  When used with threads,
8578  * however, this may cause a non-negligible performance penalty because a
8579  * writer lock will have to be acquired before updating the list.
8580  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8581  * function returns true if the entry has not been updated for some period of
8582  * time.  We differentiate the NS or glue address case and the others since
8583  * experiments have shown that the former tends to be accessed relatively
8584  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8585  * may cause external queries at a higher level zone, involving more
8586  * transactions).
8587  *
8588  * Caller must hold the node (read or write) lock.
8589  */
8590 static inline isc_boolean_t
8591 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8592         if ((header->attributes &
8593              (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8594                 return (ISC_FALSE);
8595
8596 #if DNS_RBTDB_LIMITLRUUPDATE
8597         if (header->type == dns_rdatatype_ns ||
8598             (header->trust == dns_trust_glue &&
8599              (header->type == dns_rdatatype_a ||
8600               header->type == dns_rdatatype_aaaa))) {
8601                 /*
8602                  * Glue records are updated if at least 60 seconds have passed
8603                  * since the previous update time.
8604                  */
8605                 return (header->last_used + 60 <= now);
8606         }
8607
8608         /* Other records are updated if 5 minutes have passed. */
8609         return (header->last_used + 300 <= now);
8610 #else
8611         UNUSED(now);
8612
8613         return (ISC_TRUE);
8614 #endif
8615 }
8616
8617 /*%
8618  * Update the timestamp of a given cache entry and move it to the head
8619  * of the corresponding LRU list.
8620  *
8621  * Caller must hold the node (write) lock.
8622  *
8623  * Note that the we do NOT touch the heap here, as the TTL has not changed.
8624  */
8625 static void
8626 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8627               isc_stdtime_t now)
8628 {
8629         INSIST(IS_CACHE(rbtdb));
8630
8631         /* To be checked: can we really assume this? XXXMLG */
8632         INSIST(ISC_LINK_LINKED(header, link));
8633
8634         ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
8635         header->last_used = now;
8636         ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
8637 }
8638
8639 /*%
8640  * Purge some expired and/or stale (i.e. unused for some period) cache entries
8641  * under an overmem condition.  To recover from this condition quickly, up to
8642  * 2 entries will be purged.  This process is triggered while adding a new
8643  * entry, and we specifically avoid purging entries in the same LRU bucket as
8644  * the one to which the new entry will belong.  Otherwise, we might purge
8645  * entries of the same name of different RR types while adding RRsets from a
8646  * single response (consider the case where we're adding A and AAAA glue records
8647  * of the same NS name).
8648  */
8649 static void
8650 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8651               isc_stdtime_t now, isc_boolean_t tree_locked)
8652 {
8653         rdatasetheader_t *header, *header_prev;
8654         unsigned int locknum;
8655         int purgecount = 2;
8656
8657         for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8658              locknum != locknum_start && purgecount > 0;
8659              locknum = (locknum + 1) % rbtdb->node_lock_count) {
8660                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8661                           isc_rwlocktype_write);
8662
8663                 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8664                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8665                         expire_header(rbtdb, header, tree_locked);
8666                         purgecount--;
8667                 }
8668
8669                 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8670                      header != NULL && purgecount > 0;
8671                      header = header_prev) {
8672                         header_prev = ISC_LIST_PREV(header, link);
8673                         /*
8674                          * Unlink the entry at this point to avoid checking it
8675                          * again even if it's currently used someone else and
8676                          * cannot be purged at this moment.  This entry won't be
8677                          * referenced any more (so unlinking is safe) since the
8678                          * TTL was reset to 0.
8679                          */
8680                         ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8681                                         link);
8682                         expire_header(rbtdb, header, tree_locked);
8683                         purgecount--;
8684                 }
8685
8686                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8687                                     isc_rwlocktype_write);
8688         }
8689 }
8690
8691 static void
8692 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8693               isc_boolean_t tree_locked)
8694 {
8695         set_ttl(rbtdb, header, 0);
8696         header->attributes |= RDATASET_ATTR_STALE;
8697         header->node->dirty = 1;
8698
8699         /*
8700          * Caller must hold the node (write) lock.
8701          */
8702
8703         if (dns_rbtnode_refcurrent(header->node) == 0) {
8704                 /*
8705                  * If no one else is using the node, we can clean it up now.
8706                  * We first need to gain a new reference to the node to meet a
8707                  * requirement of decrement_reference().
8708                  */
8709                 new_reference(rbtdb, header->node);
8710                 decrement_reference(rbtdb, header->node, 0,
8711                                     isc_rwlocktype_write,
8712                                     tree_locked ? isc_rwlocktype_write :
8713                                     isc_rwlocktype_none, ISC_FALSE);
8714         }
8715 }