2 * validator/val_neg.c - validator aggressive negative caching functions.
4 * Copyright (c) 2008, NLnet Labs. All rights reserved.
6 * This software is open source.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 * This file contains helper functions for the validator module.
40 * The functions help with aggressive negative caching.
41 * This creates new denials of existence, and proofs for absence of types
42 * from cached NSEC records.
45 #ifdef HAVE_OPENSSL_SSL_H
46 #include "openssl/ssl.h"
47 #define NSEC3_SHA_LEN SHA_DIGEST_LENGTH
49 #define NSEC3_SHA_LEN 20
51 #include "validator/val_neg.h"
52 #include "validator/val_nsec.h"
53 #include "validator/val_nsec3.h"
54 #include "validator/val_utils.h"
55 #include "util/data/dname.h"
56 #include "util/data/msgreply.h"
58 #include "util/net_help.h"
59 #include "util/config_file.h"
60 #include "services/cache/rrset.h"
61 #include "services/cache/dns.h"
62 #include "sldns/rrdef.h"
63 #include "sldns/sbuffer.h"
65 int val_neg_data_compare(const void* a, const void* b)
67 struct val_neg_data* x = (struct val_neg_data*)a;
68 struct val_neg_data* y = (struct val_neg_data*)b;
70 return dname_canon_lab_cmp(x->name, x->labs, y->name, y->labs, &m);
73 int val_neg_zone_compare(const void* a, const void* b)
75 struct val_neg_zone* x = (struct val_neg_zone*)a;
76 struct val_neg_zone* y = (struct val_neg_zone*)b;
78 if(x->dclass != y->dclass) {
79 if(x->dclass < y->dclass)
83 return dname_canon_lab_cmp(x->name, x->labs, y->name, y->labs, &m);
86 struct val_neg_cache* val_neg_create(struct config_file* cfg, size_t maxiter)
88 struct val_neg_cache* neg = (struct val_neg_cache*)calloc(1,
91 log_err("Could not create neg cache: out of memory");
94 neg->nsec3_max_iter = maxiter;
95 neg->max = 1024*1024; /* 1 M is thousands of entries */
96 if(cfg) neg->max = cfg->neg_cache_size;
97 rbtree_init(&neg->tree, &val_neg_zone_compare);
98 lock_basic_init(&neg->lock);
99 lock_protect(&neg->lock, neg, sizeof(*neg));
103 size_t val_neg_get_mem(struct val_neg_cache* neg)
106 lock_basic_lock(&neg->lock);
107 result = sizeof(*neg) + neg->use;
108 lock_basic_unlock(&neg->lock);
112 /** clear datas on cache deletion */
114 neg_clear_datas(rbnode_type* n, void* ATTR_UNUSED(arg))
116 struct val_neg_data* d = (struct val_neg_data*)n;
121 /** clear zones on cache deletion */
123 neg_clear_zones(rbnode_type* n, void* ATTR_UNUSED(arg))
125 struct val_neg_zone* z = (struct val_neg_zone*)n;
126 /* delete all the rrset entries in the tree */
127 traverse_postorder(&z->tree, &neg_clear_datas, NULL);
133 void neg_cache_delete(struct val_neg_cache* neg)
136 lock_basic_destroy(&neg->lock);
137 /* delete all the zones in the tree */
138 traverse_postorder(&neg->tree, &neg_clear_zones, NULL);
143 * Put data element at the front of the LRU list.
144 * @param neg: negative cache with LRU start and end.
145 * @param data: this data is fronted.
147 static void neg_lru_front(struct val_neg_cache* neg,
148 struct val_neg_data* data)
151 data->next = neg->first;
154 else neg->first->prev = data;
159 * Remove data element from LRU list.
160 * @param neg: negative cache with LRU start and end.
161 * @param data: this data is removed from the list.
163 static void neg_lru_remove(struct val_neg_cache* neg,
164 struct val_neg_data* data)
167 data->prev->next = data->next;
168 else neg->first = data->next;
170 data->next->prev = data->prev;
171 else neg->last = data->prev;
175 * Touch LRU for data element, put it at the start of the LRU list.
176 * @param neg: negative cache with LRU start and end.
177 * @param data: this data is used.
179 static void neg_lru_touch(struct val_neg_cache* neg,
180 struct val_neg_data* data)
182 if(data == neg->first)
183 return; /* nothing to do */
184 /* remove from current lru position */
185 neg_lru_remove(neg, data);
187 neg_lru_front(neg, data);
191 * Delete a zone element from the negative cache.
192 * May delete other zone elements to keep tree coherent, or
193 * only mark the element as 'not in use'.
194 * @param neg: negative cache.
195 * @param z: zone element to delete.
197 static void neg_delete_zone(struct val_neg_cache* neg, struct val_neg_zone* z)
199 struct val_neg_zone* p, *np;
201 log_assert(z->in_use);
202 log_assert(z->count > 0);
205 /* go up the tree and reduce counts */
208 log_assert(p->count > 0);
213 /* remove zones with zero count */
215 while(p && p->count == 0) {
217 (void)rbtree_delete(&neg->tree, &p->node);
218 neg->use -= p->len + sizeof(*p);
226 void neg_delete_data(struct val_neg_cache* neg, struct val_neg_data* el)
228 struct val_neg_zone* z;
229 struct val_neg_data* p, *np;
232 log_assert(el->in_use);
233 log_assert(el->count > 0);
236 /* remove it from the lru list */
237 neg_lru_remove(neg, el);
238 log_assert(neg->first != el && neg->last != el);
240 /* go up the tree and reduce counts */
243 log_assert(p->count > 0);
248 /* delete 0 count items from tree */
250 while(p && p->count == 0) {
252 (void)rbtree_delete(&z->tree, &p->node);
253 neg->use -= p->len + sizeof(*p);
259 /* check if the zone is now unused */
260 if(z->tree.count == 0) {
261 neg_delete_zone(neg, z);
266 * Create more space in negative cache
267 * The oldest elements are deleted until enough space is present.
268 * Empty zones are deleted.
269 * @param neg: negative cache.
270 * @param need: how many bytes are needed.
272 static void neg_make_space(struct val_neg_cache* neg, size_t need)
274 /* delete elements until enough space or its empty */
275 while(neg->last && neg->max < neg->use + need) {
276 neg_delete_data(neg, neg->last);
280 struct val_neg_zone* neg_find_zone(struct val_neg_cache* neg,
281 uint8_t* nm, size_t len, uint16_t dclass)
283 struct val_neg_zone lookfor;
284 struct val_neg_zone* result;
285 lookfor.node.key = &lookfor;
288 lookfor.labs = dname_count_labels(lookfor.name);
289 lookfor.dclass = dclass;
291 result = (struct val_neg_zone*)
292 rbtree_search(&neg->tree, lookfor.node.key);
297 * Find the given data
298 * @param zone: negative zone
299 * @param nm: what to look for.
300 * @param len: length of nm
301 * @param labs: labels in nm
302 * @return data or NULL if not found.
304 static struct val_neg_data* neg_find_data(struct val_neg_zone* zone,
305 uint8_t* nm, size_t len, int labs)
307 struct val_neg_data lookfor;
308 struct val_neg_data* result;
309 lookfor.node.key = &lookfor;
314 result = (struct val_neg_data*)
315 rbtree_search(&zone->tree, lookfor.node.key);
320 * Calculate space needed for the data and all its parents
321 * @param rep: NSEC entries.
324 static size_t calc_data_need(struct reply_info* rep)
327 size_t i, len, res = 0;
329 for(i=rep->an_numrrsets; i<rep->an_numrrsets+rep->ns_numrrsets; i++) {
330 if(ntohs(rep->rrsets[i]->rk.type) == LDNS_RR_TYPE_NSEC) {
331 d = rep->rrsets[i]->rk.dname;
332 len = rep->rrsets[i]->rk.dname_len;
333 res = sizeof(struct val_neg_data) + len;
334 while(!dname_is_root(d)) {
335 log_assert(len > 1); /* not root label */
336 dname_remove_label(&d, &len);
337 res += sizeof(struct val_neg_data) + len;
345 * Calculate space needed for zone and all its parents
346 * @param d: name of zone
347 * @param len: length of name
350 static size_t calc_zone_need(uint8_t* d, size_t len)
352 size_t res = sizeof(struct val_neg_zone) + len;
353 while(!dname_is_root(d)) {
354 log_assert(len > 1); /* not root label */
355 dname_remove_label(&d, &len);
356 res += sizeof(struct val_neg_zone) + len;
362 * Find closest existing parent zone of the given name.
363 * @param neg: negative cache.
364 * @param nm: name to look for
365 * @param nm_len: length of nm
366 * @param labs: labelcount of nm.
367 * @param qclass: class.
368 * @return the zone or NULL if none found.
370 static struct val_neg_zone* neg_closest_zone_parent(struct val_neg_cache* neg,
371 uint8_t* nm, size_t nm_len, int labs, uint16_t qclass)
373 struct val_neg_zone key;
374 struct val_neg_zone* result;
375 rbnode_type* res = NULL;
381 if(rbtree_find_less_equal(&neg->tree, &key, &res)) {
383 result = (struct val_neg_zone*)res;
385 /* smaller element (or no element) */
387 result = (struct val_neg_zone*)res;
388 if(!result || result->dclass != qclass)
390 /* count number of labels matched */
391 (void)dname_lab_cmp(result->name, result->labs, key.name,
393 while(result) { /* go up until qname is subdomain of stub */
394 if(result->labs <= m)
396 result = result->parent;
403 * Find closest existing parent data for the given name.
404 * @param zone: to look in.
405 * @param nm: name to look for
406 * @param nm_len: length of nm
407 * @param labs: labelcount of nm.
408 * @return the data or NULL if none found.
410 static struct val_neg_data* neg_closest_data_parent(
411 struct val_neg_zone* zone, uint8_t* nm, size_t nm_len, int labs)
413 struct val_neg_data key;
414 struct val_neg_data* result;
415 rbnode_type* res = NULL;
420 if(rbtree_find_less_equal(&zone->tree, &key, &res)) {
422 result = (struct val_neg_data*)res;
424 /* smaller element (or no element) */
426 result = (struct val_neg_data*)res;
429 /* count number of labels matched */
430 (void)dname_lab_cmp(result->name, result->labs, key.name,
432 while(result) { /* go up until qname is subdomain of stub */
433 if(result->labs <= m)
435 result = result->parent;
442 * Create a single zone node
443 * @param nm: name for zone (copied)
444 * @param nm_len: length of name
445 * @param labs: labels in name.
446 * @param dclass: class of zone, host order.
447 * @return new zone or NULL on failure
449 static struct val_neg_zone* neg_setup_zone_node(
450 uint8_t* nm, size_t nm_len, int labs, uint16_t dclass)
452 struct val_neg_zone* zone =
453 (struct val_neg_zone*)calloc(1, sizeof(*zone));
457 zone->node.key = zone;
458 zone->name = memdup(nm, nm_len);
465 zone->dclass = dclass;
467 rbtree_init(&zone->tree, &val_neg_data_compare);
472 * Create a linked list of parent zones, starting at longname ending on
473 * the parent (can be NULL, creates to the root).
474 * @param nm: name for lowest in chain
475 * @param nm_len: length of name
476 * @param labs: labels in name.
477 * @param dclass: class of zone.
478 * @param parent: NULL for to root, else so it fits under here.
479 * @return zone; a chain of zones and their parents up to the parent.
480 * or NULL on malloc failure
482 static struct val_neg_zone* neg_zone_chain(
483 uint8_t* nm, size_t nm_len, int labs, uint16_t dclass,
484 struct val_neg_zone* parent)
487 int tolabs = parent?parent->labs:0;
488 struct val_neg_zone* zone, *prev = NULL, *first = NULL;
490 /* create the new subtree, i is labelcount of current creation */
491 /* this creates a 'first' to z->parent=NULL list of zones */
492 for(i=labs; i!=tolabs; i--) {
493 /* create new item */
494 zone = neg_setup_zone_node(nm, nm_len, i, dclass);
496 /* need to delete other allocations in this routine!*/
497 struct val_neg_zone* p=first, *np;
511 /* prepare for next name */
513 dname_remove_label(&nm, &nm_len);
518 void val_neg_zone_take_inuse(struct val_neg_zone* zone)
521 struct val_neg_zone* p;
523 /* increase usage count of all parents */
524 for(p=zone; p; p = p->parent) {
530 struct val_neg_zone* neg_create_zone(struct val_neg_cache* neg,
531 uint8_t* nm, size_t nm_len, uint16_t dclass)
533 struct val_neg_zone* zone;
534 struct val_neg_zone* parent;
535 struct val_neg_zone* p, *np;
536 int labs = dname_count_labels(nm);
538 /* find closest enclosing parent zone that (still) exists */
539 parent = neg_closest_zone_parent(neg, nm, nm_len, labs, dclass);
540 if(parent && query_dname_compare(parent->name, nm) == 0)
541 return parent; /* already exists, weird */
542 /* if parent exists, it is in use */
543 log_assert(!parent || parent->count > 0);
544 zone = neg_zone_chain(nm, nm_len, labs, dclass, parent);
549 /* insert the list of zones into the tree */
554 neg->use += sizeof(struct val_neg_zone) + p->len;
556 (void)rbtree_insert(&neg->tree, &p->node);
557 /* last one needs proper parent pointer */
565 /** find zone name of message, returns the SOA record */
566 static struct ub_packed_rrset_key* reply_find_soa(struct reply_info* rep)
569 for(i=rep->an_numrrsets; i< rep->an_numrrsets+rep->ns_numrrsets; i++){
570 if(ntohs(rep->rrsets[i]->rk.type) == LDNS_RR_TYPE_SOA)
571 return rep->rrsets[i];
576 /** see if the reply has NSEC records worthy of caching */
577 static int reply_has_nsec(struct reply_info* rep)
580 struct packed_rrset_data* d;
581 if(rep->security != sec_status_secure)
583 for(i=rep->an_numrrsets; i< rep->an_numrrsets+rep->ns_numrrsets; i++){
584 if(ntohs(rep->rrsets[i]->rk.type) == LDNS_RR_TYPE_NSEC) {
585 d = (struct packed_rrset_data*)rep->rrsets[i]->
587 if(d->security == sec_status_secure)
596 * Create single node of data element.
597 * @param nm: name (copied)
598 * @param nm_len: length of name
599 * @param labs: labels in name.
600 * @return element with name nm, or NULL malloc failure.
602 static struct val_neg_data* neg_setup_data_node(
603 uint8_t* nm, size_t nm_len, int labs)
605 struct val_neg_data* el;
606 el = (struct val_neg_data*)calloc(1, sizeof(*el));
611 el->name = memdup(nm, nm_len);
622 * Create chain of data element and parents
624 * @param nm_len: length of name
625 * @param labs: labels in name.
626 * @param parent: up to where to make, if NULL up to root label.
627 * @return lowest element with name nm, or NULL malloc failure.
629 static struct val_neg_data* neg_data_chain(
630 uint8_t* nm, size_t nm_len, int labs, struct val_neg_data* parent)
633 int tolabs = parent?parent->labs:0;
634 struct val_neg_data* el, *first = NULL, *prev = NULL;
636 /* create the new subtree, i is labelcount of current creation */
637 /* this creates a 'first' to z->parent=NULL list of zones */
638 for(i=labs; i!=tolabs; i--) {
639 /* create new item */
640 el = neg_setup_data_node(nm, nm_len, i);
642 /* need to delete other allocations in this routine!*/
643 struct val_neg_data* p = first, *np;
658 /* prepare for next name */
660 dname_remove_label(&nm, &nm_len);
666 * Remove NSEC records between start and end points.
667 * By walking the tree, the tree is sorted canonically.
668 * @param neg: negative cache.
669 * @param zone: the zone
670 * @param el: element to start walking at.
671 * @param nsec: the nsec record with the end point
673 static void wipeout(struct val_neg_cache* neg, struct val_neg_zone* zone,
674 struct val_neg_data* el, struct ub_packed_rrset_key* nsec)
676 struct packed_rrset_data* d = (struct packed_rrset_data*)nsec->
681 rbnode_type* walk, *next;
682 struct val_neg_data* cur;
685 if(!d || d->count == 0 || d->rr_len[0] < 2+1)
687 if(ntohs(nsec->rk.type) == LDNS_RR_TYPE_NSEC) {
688 end = d->rr_data[0]+2;
689 end_len = dname_valid(end, d->rr_len[0]-2);
690 end_labs = dname_count_labels(end);
693 if(!nsec3_get_nextowner_b32(nsec, 0, buf, sizeof(buf)))
696 end_labs = dname_count_size_labels(end, &end_len);
699 /* sanity check, both owner and end must be below the zone apex */
700 if(!dname_subdomain_c(el->name, zone->name) ||
701 !dname_subdomain_c(end, zone->name))
704 /* detect end of zone NSEC ; wipe until the end of zone */
705 if(query_dname_compare(end, zone->name) == 0) {
709 walk = rbtree_next(&el->node);
710 while(walk && walk != RBTREE_NULL) {
711 cur = (struct val_neg_data*)walk;
712 /* sanity check: must be larger than start */
713 if(dname_canon_lab_cmp(cur->name, cur->labs,
714 el->name, el->labs, &m) <= 0) {
715 /* r == 0 skip original record. */
716 /* r < 0 too small! */
717 walk = rbtree_next(walk);
720 /* stop at endpoint, also data at empty nonterminals must be
721 * removed (no NSECs there) so everything between
723 if(end && dname_canon_lab_cmp(cur->name, cur->labs,
724 end, end_labs, &m) >= 0) {
727 /* this element has to be deleted, but we cannot do it
728 * now, because we are walking the tree still ... */
729 /* get the next element: */
730 next = rbtree_next(walk);
731 /* now delete the original element, this may trigger
732 * rbtree rebalances, but really, the next element is
734 * But it may trigger delete of other data and the
735 * entire zone. However, if that happens, this is done
736 * by deleting the *parents* of the element for deletion,
737 * and maybe also the entire zone if it is empty.
738 * But parents are smaller in canonical compare, thus,
739 * if a larger element exists, then it is not a parent,
740 * it cannot get deleted, the zone cannot get empty.
741 * If the next==NULL, then zone can be empty. */
743 neg_delete_data(neg, cur);
748 void neg_insert_data(struct val_neg_cache* neg,
749 struct val_neg_zone* zone, struct ub_packed_rrset_key* nsec)
751 struct packed_rrset_data* d;
752 struct val_neg_data* parent;
753 struct val_neg_data* el;
754 uint8_t* nm = nsec->rk.dname;
755 size_t nm_len = nsec->rk.dname_len;
756 int labs = dname_count_labels(nsec->rk.dname);
758 d = (struct packed_rrset_data*)nsec->entry.data;
759 if( !(d->security == sec_status_secure ||
760 (d->security == sec_status_unchecked && d->rrsig_count > 0)))
762 log_nametypeclass(VERB_ALGO, "negcache rr",
763 nsec->rk.dname, ntohs(nsec->rk.type),
764 ntohs(nsec->rk.rrset_class));
766 /* find closest enclosing parent data that (still) exists */
767 parent = neg_closest_data_parent(zone, nm, nm_len, labs);
768 if(parent && query_dname_compare(parent->name, nm) == 0) {
769 /* perfect match already exists */
770 log_assert(parent->count > 0);
773 struct val_neg_data* p, *np;
775 /* create subtree for perfect match */
776 /* if parent exists, it is in use */
777 log_assert(!parent || parent->count > 0);
779 el = neg_data_chain(nm, nm_len, labs, parent);
781 log_err("out of memory inserting NSEC negative cache");
784 el->in_use = 0; /* set on below */
786 /* insert the list of zones into the tree */
791 neg->use += sizeof(struct val_neg_data) + p->len;
794 (void)rbtree_insert(&zone->tree, &p->node);
795 /* last one needs proper parent pointer */
803 struct val_neg_data* p;
806 /* increase usage count of all parents */
807 for(p=el; p; p = p->parent) {
811 neg_lru_front(neg, el);
813 /* in use, bring to front, lru */
814 neg_lru_touch(neg, el);
817 /* if nsec3 store last used parameters */
818 if(ntohs(nsec->rk.type) == LDNS_RR_TYPE_NSEC3) {
822 if(nsec3_get_params(nsec, 0, &h, &it, &s, &slen) &&
823 it <= neg->nsec3_max_iter &&
824 (h != zone->nsec3_hash || it != zone->nsec3_iter ||
825 slen != zone->nsec3_saltlen ||
826 memcmp(zone->nsec3_salt, s, slen) != 0)) {
829 uint8_t* sa = memdup(s, slen);
831 free(zone->nsec3_salt);
832 zone->nsec3_salt = sa;
833 zone->nsec3_saltlen = slen;
834 zone->nsec3_iter = it;
835 zone->nsec3_hash = h;
838 free(zone->nsec3_salt);
839 zone->nsec3_salt = NULL;
840 zone->nsec3_saltlen = 0;
841 zone->nsec3_iter = it;
842 zone->nsec3_hash = h;
847 /* wipe out the cache items between NSEC start and end */
848 wipeout(neg, zone, el, nsec);
851 /** see if the reply has signed NSEC records and return the signer */
852 static uint8_t* reply_nsec_signer(struct reply_info* rep, size_t* signer_len,
856 struct packed_rrset_data* d;
858 for(i=rep->an_numrrsets; i< rep->an_numrrsets+rep->ns_numrrsets; i++){
859 if(ntohs(rep->rrsets[i]->rk.type) == LDNS_RR_TYPE_NSEC ||
860 ntohs(rep->rrsets[i]->rk.type) == LDNS_RR_TYPE_NSEC3) {
861 d = (struct packed_rrset_data*)rep->rrsets[i]->
863 /* return first signer name of first NSEC */
864 if(d->rrsig_count != 0) {
865 val_find_rrset_signer(rep->rrsets[i],
867 if(s && *signer_len) {
868 *dclass = ntohs(rep->rrsets[i]->
878 void val_neg_addreply(struct val_neg_cache* neg, struct reply_info* rep)
881 struct ub_packed_rrset_key* soa;
882 uint8_t* dname = NULL;
884 uint16_t rrset_class;
885 struct val_neg_zone* zone;
886 /* see if secure nsecs inside */
887 if(!reply_has_nsec(rep))
889 /* find the zone name in message */
890 if((soa = reply_find_soa(rep))) {
891 dname = soa->rk.dname;
892 dname_len = soa->rk.dname_len;
893 rrset_class = ntohs(soa->rk.rrset_class);
896 /* No SOA in positive (wildcard) answer. Use signer from the
897 * validated answer RRsets' signature. */
898 if(!(dname = reply_nsec_signer(rep, &dname_len, &rrset_class)))
902 log_nametypeclass(VERB_ALGO, "negcache insert for zone",
903 dname, LDNS_RR_TYPE_SOA, rrset_class);
905 /* ask for enough space to store all of it */
906 need = calc_data_need(rep) +
907 calc_zone_need(dname, dname_len);
908 lock_basic_lock(&neg->lock);
909 neg_make_space(neg, need);
911 /* find or create the zone entry */
912 zone = neg_find_zone(neg, dname, dname_len, rrset_class);
914 if(!(zone = neg_create_zone(neg, dname, dname_len,
916 lock_basic_unlock(&neg->lock);
917 log_err("out of memory adding negative zone");
921 val_neg_zone_take_inuse(zone);
923 /* insert the NSECs */
924 for(i=rep->an_numrrsets; i< rep->an_numrrsets+rep->ns_numrrsets; i++){
925 if(ntohs(rep->rrsets[i]->rk.type) != LDNS_RR_TYPE_NSEC)
927 if(!dname_subdomain_c(rep->rrsets[i]->rk.dname,
928 zone->name)) continue;
929 /* insert NSEC into this zone's tree */
930 neg_insert_data(neg, zone, rep->rrsets[i]);
932 if(zone->tree.count == 0) {
933 /* remove empty zone if inserts failed */
934 neg_delete_zone(neg, zone);
936 lock_basic_unlock(&neg->lock);
940 * Lookup closest data record. For NSEC denial.
941 * @param zone: zone to look in
942 * @param qname: name to look for.
943 * @param len: length of name
944 * @param labs: labels in name
945 * @param data: data element, exact or smaller or NULL
946 * @return true if exact match.
948 static int neg_closest_data(struct val_neg_zone* zone,
949 uint8_t* qname, size_t len, int labs, struct val_neg_data** data)
951 struct val_neg_data key;
957 if(rbtree_find_less_equal(&zone->tree, &key, &r)) {
959 *data = (struct val_neg_data*)r;
963 *data = (struct val_neg_data*)r;
968 void val_neg_addreferral(struct val_neg_cache* neg, struct reply_info* rep,
975 struct val_neg_zone* zone;
976 /* no SOA in this message, find RRSIG over NSEC's signer name.
977 * note the NSEC records are maybe not validated yet */
978 signer = reply_nsec_signer(rep, &signer_len, &dclass);
981 if(!dname_subdomain_c(signer, zone_name)) {
982 /* the signer is not in the bailiwick, throw it out */
986 log_nametypeclass(VERB_ALGO, "negcache insert referral ",
987 signer, LDNS_RR_TYPE_NS, dclass);
989 /* ask for enough space to store all of it */
990 need = calc_data_need(rep) + calc_zone_need(signer, signer_len);
991 lock_basic_lock(&neg->lock);
992 neg_make_space(neg, need);
994 /* find or create the zone entry */
995 zone = neg_find_zone(neg, signer, signer_len, dclass);
997 if(!(zone = neg_create_zone(neg, signer, signer_len,
999 lock_basic_unlock(&neg->lock);
1000 log_err("out of memory adding negative zone");
1004 val_neg_zone_take_inuse(zone);
1006 /* insert the NSECs */
1007 for(i=rep->an_numrrsets; i< rep->an_numrrsets+rep->ns_numrrsets; i++){
1008 if(ntohs(rep->rrsets[i]->rk.type) != LDNS_RR_TYPE_NSEC &&
1009 ntohs(rep->rrsets[i]->rk.type) != LDNS_RR_TYPE_NSEC3)
1011 if(!dname_subdomain_c(rep->rrsets[i]->rk.dname,
1012 zone->name)) continue;
1013 /* insert NSEC into this zone's tree */
1014 neg_insert_data(neg, zone, rep->rrsets[i]);
1016 if(zone->tree.count == 0) {
1017 /* remove empty zone if inserts failed */
1018 neg_delete_zone(neg, zone);
1020 lock_basic_unlock(&neg->lock);
1024 * Check that an NSEC3 rrset does not have a type set.
1025 * None of the nsec3s in a hash-collision are allowed to have the type.
1026 * (since we do not know which one is the nsec3 looked at, flags, ..., we
1027 * ignore the cached item and let it bypass negative caching).
1028 * @param k: the nsec3 rrset to check.
1029 * @param t: type to check
1030 * @return true if no RRs have the type.
1032 static int nsec3_no_type(struct ub_packed_rrset_key* k, uint16_t t)
1034 int count = (int)((struct packed_rrset_data*)k->entry.data)->count;
1036 for(i=0; i<count; i++)
1037 if(nsec3_has_type(k, i, t))
1043 * See if rrset exists in rrset cache.
1044 * If it does, the bit is checked, and if not expired, it is returned
1045 * allocated in region.
1046 * @param rrset_cache: rrset cache
1047 * @param qname: to lookup rrset name
1048 * @param qname_len: length of qname.
1049 * @param qtype: type of rrset to lookup, host order
1050 * @param qclass: class of rrset to lookup, host order
1051 * @param flags: flags for rrset to lookup
1052 * @param region: where to alloc result
1053 * @param checkbit: if true, a bit in the nsec typemap is checked for absence.
1054 * @param checktype: which bit to check
1055 * @param now: to check ttl against
1056 * @return rrset or NULL
1058 static struct ub_packed_rrset_key*
1059 grab_nsec(struct rrset_cache* rrset_cache, uint8_t* qname, size_t qname_len,
1060 uint16_t qtype, uint16_t qclass, uint32_t flags,
1061 struct regional* region, int checkbit, uint16_t checktype,
1064 struct ub_packed_rrset_key* r, *k = rrset_cache_lookup(rrset_cache,
1065 qname, qname_len, qtype, qclass, flags, now, 0);
1066 struct packed_rrset_data* d;
1068 d = (struct packed_rrset_data*)k->entry.data;
1070 lock_rw_unlock(&k->entry.lock);
1073 /* only secure or unchecked records that have signatures. */
1074 if( ! ( d->security == sec_status_secure ||
1075 (d->security == sec_status_unchecked &&
1076 d->rrsig_count > 0) ) ) {
1077 lock_rw_unlock(&k->entry.lock);
1080 /* check if checktype is absent */
1082 (qtype == LDNS_RR_TYPE_NSEC && nsec_has_type(k, checktype)) ||
1083 (qtype == LDNS_RR_TYPE_NSEC3 && !nsec3_no_type(k, checktype))
1085 lock_rw_unlock(&k->entry.lock);
1088 /* looks OK! copy to region and return it */
1089 r = packed_rrset_copy_region(k, region, now);
1090 /* if it failed, we return the NULL */
1091 lock_rw_unlock(&k->entry.lock);
1096 * Get best NSEC record for qname. Might be matching, covering or totally
1098 * @param neg_cache: neg cache
1099 * @param qname: to lookup rrset name
1100 * @param qname_len: length of qname.
1101 * @param qclass: class of rrset to lookup, host order
1102 * @param rrset_cache: rrset cache
1103 * @param now: to check ttl against
1104 * @param region: where to alloc result
1105 * @return rrset or NULL
1107 static struct ub_packed_rrset_key*
1108 neg_find_nsec(struct val_neg_cache* neg_cache, uint8_t* qname, size_t qname_len,
1109 uint16_t qclass, struct rrset_cache* rrset_cache, time_t now,
1110 struct regional* region)
1114 struct val_neg_zone* zone;
1115 struct val_neg_data* data;
1116 struct ub_packed_rrset_key* nsec;
1118 labs = dname_count_labels(qname);
1119 lock_basic_lock(&neg_cache->lock);
1120 zone = neg_closest_zone_parent(neg_cache, qname, qname_len, labs,
1122 while(zone && !zone->in_use)
1123 zone = zone->parent;
1125 lock_basic_unlock(&neg_cache->lock);
1129 /* NSEC only for now */
1130 if(zone->nsec3_hash) {
1131 lock_basic_unlock(&neg_cache->lock);
1135 /* ignore return value, don't care if it is an exact or smaller match */
1136 (void)neg_closest_data(zone, qname, qname_len, labs, &data);
1138 lock_basic_unlock(&neg_cache->lock);
1142 /* ENT nodes are not in use, try the previous node. If the previous node
1143 * is not in use, we don't have an useful NSEC and give up. */
1145 data = (struct val_neg_data*)rbtree_previous((rbnode_type*)data);
1146 if((rbnode_type*)data == RBTREE_NULL || !data->in_use) {
1147 lock_basic_unlock(&neg_cache->lock);
1153 if(query_dname_compare(data->name, zone->name) == 0)
1154 flags = PACKED_RRSET_NSEC_AT_APEX;
1156 nsec = grab_nsec(rrset_cache, data->name, data->len, LDNS_RR_TYPE_NSEC,
1157 zone->dclass, flags, region, 0, 0, now);
1158 lock_basic_unlock(&neg_cache->lock);
1162 /** find nsec3 closest encloser in neg cache */
1163 static struct val_neg_data*
1164 neg_find_nsec3_ce(struct val_neg_zone* zone, uint8_t* qname, size_t qname_len,
1165 int qlabs, sldns_buffer* buf, uint8_t* hashnc, size_t* nclen)
1167 struct val_neg_data* data;
1168 uint8_t hashce[NSEC3_SHA_LEN];
1170 size_t celen, b32len;
1175 if(!(celen=nsec3_get_hashed(buf, qname, qname_len,
1176 zone->nsec3_hash, zone->nsec3_iter, zone->nsec3_salt,
1177 zone->nsec3_saltlen, hashce, sizeof(hashce))))
1179 if(!(b32len=nsec3_hash_to_b32(hashce, celen, zone->name,
1180 zone->len, b32, sizeof(b32))))
1183 /* lookup (exact match only) */
1184 data = neg_find_data(zone, b32, b32len, zone->labs+1);
1185 if(data && data->in_use) {
1186 /* found ce match! */
1191 memmove(hashnc, hashce, celen);
1192 dname_remove_label(&qname, &qname_len);
1198 /** check nsec3 parameters on nsec3 rrset with current zone values */
1200 neg_params_ok(struct val_neg_zone* zone, struct ub_packed_rrset_key* rrset)
1205 if(!nsec3_get_params(rrset, 0, &h, &it, &s, &slen))
1207 return (h == zone->nsec3_hash && it == zone->nsec3_iter &&
1208 slen == zone->nsec3_saltlen &&
1209 memcmp(zone->nsec3_salt, s, slen) == 0);
1212 /** get next closer for nsec3 proof */
1213 static struct ub_packed_rrset_key*
1214 neg_nsec3_getnc(struct val_neg_zone* zone, uint8_t* hashnc, size_t nclen,
1215 struct rrset_cache* rrset_cache, struct regional* region,
1216 time_t now, uint8_t* b32, size_t maxb32)
1218 struct ub_packed_rrset_key* nc_rrset;
1219 struct val_neg_data* data;
1222 if(!(b32len=nsec3_hash_to_b32(hashnc, nclen, zone->name,
1223 zone->len, b32, maxb32)))
1225 (void)neg_closest_data(zone, b32, b32len, zone->labs+1, &data);
1226 if(!data && zone->tree.count != 0) {
1227 /* could be before the first entry ; return the last
1228 * entry (possibly the rollover nsec3 at end) */
1229 data = (struct val_neg_data*)rbtree_last(&zone->tree);
1231 while(data && !data->in_use)
1232 data = data->parent;
1235 /* got a data element in tree, grab it */
1236 nc_rrset = grab_nsec(rrset_cache, data->name, data->len,
1237 LDNS_RR_TYPE_NSEC3, zone->dclass, 0, region, 0, 0, now);
1240 if(!neg_params_ok(zone, nc_rrset))
1245 /** neg cache nsec3 proof procedure*/
1246 static struct dns_msg*
1247 neg_nsec3_proof_ds(struct val_neg_zone* zone, uint8_t* qname, size_t qname_len,
1248 int qlabs, sldns_buffer* buf, struct rrset_cache* rrset_cache,
1249 struct regional* region, time_t now, uint8_t* topname)
1251 struct dns_msg* msg;
1252 struct val_neg_data* data;
1253 uint8_t hashnc[NSEC3_SHA_LEN];
1255 struct ub_packed_rrset_key* ce_rrset, *nc_rrset;
1256 struct nsec3_cached_hash c;
1257 uint8_t nc_b32[257];
1259 /* for NSEC3 ; determine the closest encloser for which we
1260 * can find an exact match. Remember the hashed lower name,
1261 * since that is the one we need a closest match for.
1262 * If we find a match straight away, then it becomes NODATA.
1263 * Otherwise, NXDOMAIN or if OPTOUT, an insecure delegation.
1264 * Also check that parameters are the same on closest encloser
1265 * and on closest match.
1267 if(!zone->nsec3_hash)
1268 return NULL; /* not nsec3 zone */
1270 if(!(data=neg_find_nsec3_ce(zone, qname, qname_len, qlabs, buf,
1275 /* grab the ce rrset */
1276 ce_rrset = grab_nsec(rrset_cache, data->name, data->len,
1277 LDNS_RR_TYPE_NSEC3, zone->dclass, 0, region, 1,
1278 LDNS_RR_TYPE_DS, now);
1281 if(!neg_params_ok(zone, ce_rrset))
1285 /* exact match, just check the type bits */
1286 /* need: -SOA, -DS, +NS */
1287 if(nsec3_has_type(ce_rrset, 0, LDNS_RR_TYPE_SOA) ||
1288 nsec3_has_type(ce_rrset, 0, LDNS_RR_TYPE_DS) ||
1289 !nsec3_has_type(ce_rrset, 0, LDNS_RR_TYPE_NS))
1291 if(!(msg = dns_msg_create(qname, qname_len,
1292 LDNS_RR_TYPE_DS, zone->dclass, region, 1)))
1294 /* TTL reduced in grab_nsec */
1295 if(!dns_msg_authadd(msg, region, ce_rrset, 0))
1300 /* optout is not allowed without knowing the trust-anchor in use,
1301 * otherwise the optout could spoof away that anchor */
1305 /* if there is no exact match, it must be in an optout span
1306 * (an existing DS implies an NSEC3 must exist) */
1307 nc_rrset = neg_nsec3_getnc(zone, hashnc, nclen, rrset_cache,
1308 region, now, nc_b32, sizeof(nc_b32));
1311 if(!neg_params_ok(zone, nc_rrset))
1313 if(!nsec3_has_optout(nc_rrset, 0))
1318 c.b32_len = (size_t)nc_b32[0];
1319 if(nsec3_covers(zone->name, &c, nc_rrset, 0, buf)) {
1320 /* nc_rrset covers the next closer name.
1321 * ce_rrset equals a closer encloser.
1322 * nc_rrset is optout.
1323 * No need to check wildcard for type DS */
1324 /* capacity=3: ce + nc + soa(if needed) */
1325 if(!(msg = dns_msg_create(qname, qname_len,
1326 LDNS_RR_TYPE_DS, zone->dclass, region, 3)))
1328 /* now=0 because TTL was reduced in grab_nsec */
1329 if(!dns_msg_authadd(msg, region, ce_rrset, 0))
1331 if(!dns_msg_authadd(msg, region, nc_rrset, 0))
1339 * Add SOA record for external responses.
1340 * @param rrset_cache: to look into.
1341 * @param now: current time.
1342 * @param region: where to perform the allocation
1343 * @param msg: current msg with NSEC.
1344 * @param zone: val_neg_zone if we have one.
1345 * @return false on lookup or alloc failure.
1347 static int add_soa(struct rrset_cache* rrset_cache, time_t now,
1348 struct regional* region, struct dns_msg* msg, struct val_neg_zone* zone)
1350 struct ub_packed_rrset_key* soa;
1357 dclass = zone->dclass;
1359 /* Assumes the signer is the zone SOA to add */
1360 nm = reply_nsec_signer(msg->rep, &nmlen, &dclass);
1364 soa = rrset_cache_lookup(rrset_cache, nm, nmlen, LDNS_RR_TYPE_SOA,
1365 dclass, PACKED_RRSET_SOA_NEG, now, 0);
1368 if(!dns_msg_authadd(msg, region, soa, now)) {
1369 lock_rw_unlock(&soa->entry.lock);
1372 lock_rw_unlock(&soa->entry.lock);
1377 val_neg_getmsg(struct val_neg_cache* neg, struct query_info* qinfo,
1378 struct regional* region, struct rrset_cache* rrset_cache,
1379 sldns_buffer* buf, time_t now, int addsoa, uint8_t* topname,
1380 struct config_file* cfg)
1382 struct dns_msg* msg;
1383 struct ub_packed_rrset_key* nsec; /* qname matching/covering nsec */
1384 struct ub_packed_rrset_key* wcrr; /* wildcard record or nsec */
1385 uint8_t* nodata_wc = NULL;
1388 uint8_t wc_ce[LDNS_MAX_DOMAINLEN+3];
1389 struct query_info wc_qinfo;
1390 struct ub_packed_rrset_key* cache_wc;
1391 struct packed_rrset_data* wcrr_data;
1392 int rcode = LDNS_RCODE_NOERROR;
1396 struct val_neg_zone* zone;
1398 /* only for DS queries when aggressive use of NSEC is disabled */
1399 if(qinfo->qtype != LDNS_RR_TYPE_DS && !cfg->aggressive_nsec)
1401 log_assert(!topname || dname_subdomain_c(qinfo->qname, topname));
1403 /* Get best available NSEC for qname */
1404 nsec = neg_find_nsec(neg, qinfo->qname, qinfo->qname_len, qinfo->qclass,
1405 rrset_cache, now, region);
1407 /* Matching NSEC, use to generate No Data answer. Not creating answers
1408 * yet for No Data proven using wildcard. */
1409 if(nsec && nsec_proves_nodata(nsec, qinfo, &nodata_wc) && !nodata_wc) {
1410 if(!(msg = dns_msg_create(qinfo->qname, qinfo->qname_len,
1411 qinfo->qtype, qinfo->qclass, region, 2)))
1413 if(!dns_msg_authadd(msg, region, nsec, 0))
1415 if(addsoa && !add_soa(rrset_cache, now, region, msg, NULL))
1418 lock_basic_lock(&neg->lock);
1419 neg->num_neg_cache_noerror++;
1420 lock_basic_unlock(&neg->lock);
1422 } else if(nsec && val_nsec_proves_name_error(nsec, qinfo->qname)) {
1423 if(!(msg = dns_msg_create(qinfo->qname, qinfo->qname_len,
1424 qinfo->qtype, qinfo->qclass, region, 3)))
1426 if(!(ce = nsec_closest_encloser(qinfo->qname, nsec)))
1428 dname_count_size_labels(ce, &ce_len);
1430 /* No extra extra NSEC required if both nameerror qname and
1431 * nodata *.ce. are proven already. */
1432 if(!nodata_wc || query_dname_compare(nodata_wc, ce) != 0) {
1433 /* Qname proven non existing, get wildcard record for
1434 * QTYPE or NSEC covering or matching wildcard. */
1436 /* Num labels in ce is always smaller than in qname,
1437 * therefore adding the wildcard label cannot overflow
1440 wc_ce[1] = (uint8_t)'*';
1441 memmove(wc_ce+2, ce, ce_len);
1442 wc_qinfo.qname = wc_ce;
1443 wc_qinfo.qname_len = ce_len + 2;
1444 wc_qinfo.qtype = qinfo->qtype;
1447 if((cache_wc = rrset_cache_lookup(rrset_cache, wc_qinfo.qname,
1448 wc_qinfo.qname_len, wc_qinfo.qtype,
1449 qinfo->qclass, 0/*flags*/, now, 0/*read only*/))) {
1450 /* Synthesize wildcard answer */
1451 wcrr_data = (struct packed_rrset_data*)cache_wc->entry.data;
1452 if(!(wcrr_data->security == sec_status_secure ||
1453 (wcrr_data->security == sec_status_unchecked &&
1454 wcrr_data->rrsig_count > 0))) {
1455 lock_rw_unlock(&cache_wc->entry.lock);
1458 if(!(wcrr = packed_rrset_copy_region(cache_wc,
1460 lock_rw_unlock(&cache_wc->entry.lock);
1463 lock_rw_unlock(&cache_wc->entry.lock);
1464 wcrr->rk.dname = qinfo->qname;
1465 wcrr->rk.dname_len = qinfo->qname_len;
1466 if(!dns_msg_ansadd(msg, region, wcrr, 0))
1468 /* No SOA needed for wildcard synthesised
1472 /* Get wildcard NSEC for possible non existence
1474 if(!(wcrr = neg_find_nsec(neg, wc_qinfo.qname,
1475 wc_qinfo.qname_len, qinfo->qclass,
1476 rrset_cache, now, region)))
1480 if(val_nsec_proves_name_error(wcrr, wc_ce))
1481 rcode = LDNS_RCODE_NXDOMAIN;
1482 else if(!nsec_proves_nodata(wcrr, &wc_qinfo,
1483 &nodata_wc) || nodata_wc)
1484 /* &nodata_wc shouldn't be set, wc_qinfo
1485 * already contains wildcard domain. */
1486 /* NSEC doesn't prove anything for
1489 if(query_dname_compare(wcrr->rk.dname,
1490 nsec->rk.dname) != 0)
1491 if(!dns_msg_authadd(msg, region, wcrr, 0))
1496 if(!dns_msg_authadd(msg, region, nsec, 0))
1498 if(addsoa && !add_soa(rrset_cache, now, region, msg, NULL))
1501 /* Increment statistic counters */
1502 lock_basic_lock(&neg->lock);
1503 if(rcode == LDNS_RCODE_NOERROR)
1504 neg->num_neg_cache_noerror++;
1505 else if(rcode == LDNS_RCODE_NXDOMAIN)
1506 neg->num_neg_cache_nxdomain++;
1507 lock_basic_unlock(&neg->lock);
1509 FLAGS_SET_RCODE(msg->rep->flags, rcode);
1513 /* No aggressive use of NSEC3 for now, only proceed for DS types. */
1514 if(qinfo->qtype != LDNS_RR_TYPE_DS){
1517 /* check NSEC3 neg cache for type DS */
1518 /* need to look one zone higher for DS type */
1519 zname = qinfo->qname;
1520 zname_len = qinfo->qname_len;
1521 dname_remove_label(&zname, &zname_len);
1522 zname_labs = dname_count_labels(zname);
1524 /* lookup closest zone */
1525 lock_basic_lock(&neg->lock);
1526 zone = neg_closest_zone_parent(neg, zname, zname_len, zname_labs,
1528 while(zone && !zone->in_use)
1529 zone = zone->parent;
1530 /* check that the zone is not too high up so that we do not pick data
1531 * out of a zone that is above the last-seen key (or trust-anchor). */
1532 if(zone && topname) {
1533 if(!dname_subdomain_c(zone->name, topname))
1537 lock_basic_unlock(&neg->lock);
1541 msg = neg_nsec3_proof_ds(zone, qinfo->qname, qinfo->qname_len,
1542 zname_labs+1, buf, rrset_cache, region, now, topname);
1543 if(msg && addsoa && !add_soa(rrset_cache, now, region, msg, zone)) {
1544 lock_basic_unlock(&neg->lock);
1547 lock_basic_unlock(&neg->lock);