2 * services/cache/infra.c - infrastructure cache, server rtt and capabilities
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
6 * This software is open source.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
39 * This file contains the infrastructure cache.
43 #include "services/cache/infra.h"
44 #include "util/storage/slabhash.h"
45 #include "util/storage/lookup3.h"
46 #include "util/data/dname.h"
48 #include "util/net_help.h"
49 #include "util/config_file.h"
50 #include "iterator/iterator.h"
52 /** Timeout when only a single probe query per IP is allowed. */
53 #define PROBE_MAXRTO 12000 /* in msec */
55 /** number of timeouts for a type when the domain can be blocked ;
56 * even if another type has completely rtt maxed it, the different type
57 * can do this number of packets (until those all timeout too) */
58 #define TIMEOUT_COUNT_MAX 3
61 infra_sizefunc(void* k, void* ATTR_UNUSED(d))
63 struct infra_key* key = (struct infra_key*)k;
64 return sizeof(*key) + sizeof(struct infra_data) + key->namelen
65 + lock_get_mem(&key->entry.lock);
69 infra_compfunc(void* key1, void* key2)
71 struct infra_key* k1 = (struct infra_key*)key1;
72 struct infra_key* k2 = (struct infra_key*)key2;
73 int r = sockaddr_cmp(&k1->addr, k1->addrlen, &k2->addr, k2->addrlen);
76 if(k1->namelen != k2->namelen) {
77 if(k1->namelen < k2->namelen)
81 return query_dname_compare(k1->zonename, k2->zonename);
85 infra_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
87 struct infra_key* key = (struct infra_key*)k;
90 lock_rw_destroy(&key->entry.lock);
96 infra_deldatafunc(void* d, void* ATTR_UNUSED(arg))
98 struct infra_data* data = (struct infra_data*)d;
103 infra_create(struct config_file* cfg)
105 struct infra_cache* infra = (struct infra_cache*)calloc(1,
106 sizeof(struct infra_cache));
107 size_t maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+
108 sizeof(struct infra_data)+INFRA_BYTES_NAME);
109 infra->hosts = slabhash_create(cfg->infra_cache_slabs,
110 INFRA_HOST_STARTSIZE, maxmem, &infra_sizefunc, &infra_compfunc,
111 &infra_delkeyfunc, &infra_deldatafunc, NULL);
116 infra->host_ttl = cfg->host_ttl;
121 infra_delete(struct infra_cache* infra)
125 slabhash_delete(infra->hosts);
130 infra_adjust(struct infra_cache* infra, struct config_file* cfg)
134 return infra_create(cfg);
135 infra->host_ttl = cfg->host_ttl;
136 maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+
137 sizeof(struct infra_data)+INFRA_BYTES_NAME);
138 if(maxmem != slabhash_get_size(infra->hosts) ||
139 cfg->infra_cache_slabs != infra->hosts->size) {
141 infra = infra_create(cfg);
146 /** calculate the hash value for a host key */
148 hash_addr(struct sockaddr_storage* addr, socklen_t addrlen)
150 hashvalue_t h = 0xab;
151 /* select the pieces to hash, some OS have changing data inside */
152 if(addr_is_ip6(addr, addrlen)) {
153 struct sockaddr_in6* in6 = (struct sockaddr_in6*)addr;
154 h = hashlittle(&in6->sin6_family, sizeof(in6->sin6_family), h);
155 h = hashlittle(&in6->sin6_port, sizeof(in6->sin6_port), h);
156 h = hashlittle(&in6->sin6_addr, INET6_SIZE, h);
158 struct sockaddr_in* in = (struct sockaddr_in*)addr;
159 h = hashlittle(&in->sin_family, sizeof(in->sin_family), h);
160 h = hashlittle(&in->sin_port, sizeof(in->sin_port), h);
161 h = hashlittle(&in->sin_addr, INET_SIZE, h);
166 /** calculate infra hash for a key */
168 hash_infra(struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name)
170 return dname_query_hash(name, hash_addr(addr, addrlen));
173 /** lookup version that does not check host ttl (you check it) */
174 struct lruhash_entry*
175 infra_lookup_nottl(struct infra_cache* infra, struct sockaddr_storage* addr,
176 socklen_t addrlen, uint8_t* name, size_t namelen, int wr)
180 memcpy(&k.addr, addr, addrlen);
183 k.entry.hash = hash_infra(addr, addrlen, name);
184 k.entry.key = (void*)&k;
186 return slabhash_lookup(infra->hosts, k.entry.hash, &k, wr);
189 /** init the data elements */
191 data_entry_init(struct infra_cache* infra, struct lruhash_entry* e,
194 struct infra_data* data = (struct infra_data*)e->data;
195 data->ttl = timenow + infra->host_ttl;
196 rtt_init(&data->rtt);
197 data->edns_version = 0;
198 data->edns_lame_known = 0;
199 data->probedelay = 0;
200 data->isdnsseclame = 0;
202 data->lame_type_A = 0;
203 data->lame_other = 0;
205 data->timeout_AAAA = 0;
206 data->timeout_other = 0;
210 * Create and init a new entry for a host
211 * @param infra: infra structure with config parameters.
212 * @param addr: host address.
213 * @param addrlen: length of addr.
214 * @param name: name of zone
215 * @param namelen: length of name.
216 * @param tm: time now.
217 * @return: the new entry or NULL on malloc failure.
219 static struct lruhash_entry*
220 new_entry(struct infra_cache* infra, struct sockaddr_storage* addr,
221 socklen_t addrlen, uint8_t* name, size_t namelen, uint32_t tm)
223 struct infra_data* data;
224 struct infra_key* key = (struct infra_key*)malloc(sizeof(*key));
227 data = (struct infra_data*)malloc(sizeof(struct infra_data));
232 key->zonename = memdup(name, namelen);
238 key->namelen = namelen;
239 lock_rw_init(&key->entry.lock);
240 key->entry.hash = hash_infra(addr, addrlen, name);
241 key->entry.key = (void*)key;
242 key->entry.data = (void*)data;
243 key->addrlen = addrlen;
244 memcpy(&key->addr, addr, addrlen);
245 data_entry_init(infra, &key->entry, tm);
250 infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
251 socklen_t addrlen, uint8_t* nm, size_t nmlen, uint32_t timenow,
252 int* edns_vs, uint8_t* edns_lame_known, int* to)
254 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
256 struct infra_data* data;
258 if(e && ((struct infra_data*)e->data)->ttl < timenow) {
259 /* it expired, try to reuse existing entry */
260 int old = ((struct infra_data*)e->data)->rtt.rto;
261 uint8_t tA = ((struct infra_data*)e->data)->timeout_A;
262 uint8_t tAAAA = ((struct infra_data*)e->data)->timeout_AAAA;
263 uint8_t tother = ((struct infra_data*)e->data)->timeout_other;
264 lock_rw_unlock(&e->lock);
265 e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1);
267 /* if its still there we have a writelock, init */
269 /* do not touch lameness, it may be valid still */
270 data_entry_init(infra, e, timenow);
272 /* TOP_TIMEOUT remains on reuse */
273 if(old >= USEFUL_SERVER_TOP_TIMEOUT) {
274 ((struct infra_data*)e->data)->rtt.rto
275 = USEFUL_SERVER_TOP_TIMEOUT;
276 ((struct infra_data*)e->data)->timeout_A = tA;
277 ((struct infra_data*)e->data)->timeout_AAAA = tAAAA;
278 ((struct infra_data*)e->data)->timeout_other = tother;
283 /* insert new entry */
284 if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
286 data = (struct infra_data*)e->data;
287 *edns_vs = data->edns_version;
288 *edns_lame_known = data->edns_lame_known;
289 *to = rtt_timeout(&data->rtt);
290 slabhash_insert(infra->hosts, e->hash, e, data, NULL);
293 /* use existing entry */
294 data = (struct infra_data*)e->data;
295 *edns_vs = data->edns_version;
296 *edns_lame_known = data->edns_lame_known;
297 *to = rtt_timeout(&data->rtt);
298 if(*to >= PROBE_MAXRTO && rtt_notimeout(&data->rtt)*4 <= *to) {
299 /* delay other queries, this is the probe query */
301 lock_rw_unlock(&e->lock);
302 e = infra_lookup_nottl(infra, addr,addrlen,nm,nmlen, 1);
303 if(!e) { /* flushed from cache real fast, no use to
304 allocate just for the probedelay */
307 data = (struct infra_data*)e->data;
309 /* add 999 to round up the timeout value from msec to sec,
310 * then add a whole second so it is certain that this probe
311 * has timed out before the next is allowed */
312 data->probedelay = timenow + ((*to)+1999)/1000;
314 lock_rw_unlock(&e->lock);
319 infra_set_lame(struct infra_cache* infra, struct sockaddr_storage* addr,
320 socklen_t addrlen, uint8_t* nm, size_t nmlen, uint32_t timenow,
321 int dnsseclame, int reclame, uint16_t qtype)
323 struct infra_data* data;
324 struct lruhash_entry* e;
325 int needtoinsert = 0;
326 e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1);
329 if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) {
330 log_err("set_lame: malloc failure");
334 } else if( ((struct infra_data*)e->data)->ttl < timenow) {
335 /* expired, reuse existing entry */
336 data_entry_init(infra, e, timenow);
338 /* got an entry, now set the zone lame */
339 data = (struct infra_data*)e->data;
340 /* merge data (if any) */
342 data->isdnsseclame = 1;
345 if(!dnsseclame && !reclame && qtype == LDNS_RR_TYPE_A)
346 data->lame_type_A = 1;
347 if(!dnsseclame && !reclame && qtype != LDNS_RR_TYPE_A)
348 data->lame_other = 1;
351 slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
352 else { lock_rw_unlock(&e->lock); }
357 infra_update_tcp_works(struct infra_cache* infra,
358 struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm,
361 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
363 struct infra_data* data;
365 return; /* doesn't exist */
366 data = (struct infra_data*)e->data;
367 if(data->rtt.rto >= RTT_MAX_TIMEOUT)
368 /* do not disqualify this server altogether, it is better
370 data->rtt.rto = RTT_MAX_TIMEOUT-1000;
371 lock_rw_unlock(&e->lock);
375 infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr,
376 socklen_t addrlen, uint8_t* nm, size_t nmlen, int qtype,
377 int roundtrip, int orig_rtt, uint32_t timenow)
379 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
381 struct infra_data* data;
382 int needtoinsert = 0;
385 if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
388 } else if(((struct infra_data*)e->data)->ttl < timenow) {
389 data_entry_init(infra, e, timenow);
391 /* have an entry, update the rtt */
392 data = (struct infra_data*)e->data;
393 if(roundtrip == -1) {
394 rtt_lost(&data->rtt, orig_rtt);
395 if(qtype == LDNS_RR_TYPE_A) {
396 if(data->timeout_A < TIMEOUT_COUNT_MAX)
398 } else if(qtype == LDNS_RR_TYPE_AAAA) {
399 if(data->timeout_AAAA < TIMEOUT_COUNT_MAX)
400 data->timeout_AAAA++;
402 if(data->timeout_other < TIMEOUT_COUNT_MAX)
403 data->timeout_other++;
406 /* if we got a reply, but the old timeout was above server
407 * selection height, delete the timeout so the server is
408 * fully available again */
409 if(rtt_unclamped(&data->rtt) >= USEFUL_SERVER_TOP_TIMEOUT)
410 rtt_init(&data->rtt);
411 rtt_update(&data->rtt, roundtrip);
412 data->probedelay = 0;
413 if(qtype == LDNS_RR_TYPE_A)
415 else if(qtype == LDNS_RR_TYPE_AAAA)
416 data->timeout_AAAA = 0;
417 else data->timeout_other = 0;
419 if(data->rtt.rto > 0)
423 slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
424 else { lock_rw_unlock(&e->lock); }
428 int infra_get_host_rto(struct infra_cache* infra,
429 struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm,
430 size_t nmlen, struct rtt_info* rtt, int* delay, uint32_t timenow,
431 int* tA, int* tAAAA, int* tother)
433 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
435 struct infra_data* data;
438 data = (struct infra_data*)e->data;
439 if(data->ttl >= timenow) {
440 ttl = (int)(data->ttl - timenow);
441 memmove(rtt, &data->rtt, sizeof(*rtt));
442 if(timenow < data->probedelay)
443 *delay = (int)(data->probedelay - timenow);
446 *tA = (int)data->timeout_A;
447 *tAAAA = (int)data->timeout_AAAA;
448 *tother = (int)data->timeout_other;
449 lock_rw_unlock(&e->lock);
454 infra_edns_update(struct infra_cache* infra, struct sockaddr_storage* addr,
455 socklen_t addrlen, uint8_t* nm, size_t nmlen, int edns_version,
458 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
460 struct infra_data* data;
461 int needtoinsert = 0;
463 if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
466 } else if(((struct infra_data*)e->data)->ttl < timenow) {
467 data_entry_init(infra, e, timenow);
469 /* have an entry, update the rtt, and the ttl */
470 data = (struct infra_data*)e->data;
471 /* do not update if noEDNS and stored is yesEDNS */
472 if(!(edns_version == -1 && (data->edns_version != -1 &&
473 data->edns_lame_known))) {
474 data->edns_version = edns_version;
475 data->edns_lame_known = 1;
479 slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
480 else { lock_rw_unlock(&e->lock); }
485 infra_get_lame_rtt(struct infra_cache* infra,
486 struct sockaddr_storage* addr, socklen_t addrlen,
487 uint8_t* name, size_t namelen, uint16_t qtype,
488 int* lame, int* dnsseclame, int* reclame, int* rtt, uint32_t timenow)
490 struct infra_data* host;
491 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
495 host = (struct infra_data*)e->data;
496 *rtt = rtt_unclamped(&host->rtt);
497 if(host->rtt.rto >= PROBE_MAXRTO && timenow < host->probedelay
498 && rtt_notimeout(&host->rtt)*4 <= host->rtt.rto) {
499 /* single probe for this domain, and we are not probing */
500 /* unless the query type allows a probe to happen */
501 if(qtype == LDNS_RR_TYPE_A) {
502 if(host->timeout_A >= TIMEOUT_COUNT_MAX)
503 *rtt = USEFUL_SERVER_TOP_TIMEOUT;
504 else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
505 } else if(qtype == LDNS_RR_TYPE_AAAA) {
506 if(host->timeout_AAAA >= TIMEOUT_COUNT_MAX)
507 *rtt = USEFUL_SERVER_TOP_TIMEOUT;
508 else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
510 if(host->timeout_other >= TIMEOUT_COUNT_MAX)
511 *rtt = USEFUL_SERVER_TOP_TIMEOUT;
512 else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
515 if(timenow > host->ttl) {
517 /* see if this can be a re-probe of an unresponsive server */
518 /* minus 1000 because that is outside of the RTTBAND, so
519 * blacklisted servers stay blacklisted if this is chosen */
520 if(host->rtt.rto >= USEFUL_SERVER_TOP_TIMEOUT) {
521 lock_rw_unlock(&e->lock);
522 *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
528 lock_rw_unlock(&e->lock);
531 /* check lameness first */
532 if(host->lame_type_A && qtype == LDNS_RR_TYPE_A) {
533 lock_rw_unlock(&e->lock);
538 } else if(host->lame_other && qtype != LDNS_RR_TYPE_A) {
539 lock_rw_unlock(&e->lock);
544 } else if(host->isdnsseclame) {
545 lock_rw_unlock(&e->lock);
550 } else if(host->rec_lame) {
551 lock_rw_unlock(&e->lock);
557 /* no lameness for this type of query */
558 lock_rw_unlock(&e->lock);
566 infra_get_mem(struct infra_cache* infra)
568 return sizeof(*infra) + slabhash_get_mem(infra->hosts);