2 * Copyright (C) 2013, 2014 Internet Systems Consortium, Inc. ("ISC")
4 * Permission to use, copy, modify, and/or distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
10 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
13 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
14 * PERFORMANCE OF THIS SOFTWARE.
20 * Rate limit DNS responses.
23 /* #define ISC_LIST_CHECKINIT */
28 #include <isc/netaddr.h>
29 #include <isc/print.h>
31 #include <dns/result.h>
32 #include <dns/rcode.h>
33 #include <dns/rdatatype.h>
34 #include <dns/rdataclass.h>
40 log_end(dns_rrl_t *rrl, dns_rrl_entry_t *e, isc_boolean_t early,
41 char *log_buf, unsigned int log_buf_len);
44 * Get a modulus for a hash function that is tolerably likely to be
45 * relatively prime to most inputs. Of course, we get a prime for for initial
46 * values not larger than the square of the last prime. We often get a prime
48 * This works well in practice for hash tables up to at least 100
49 * times the square of the last prime and better than a multiplicative hash.
52 hash_divisor(unsigned int initial) {
53 static isc_uint16_t primes[] = {
54 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41,
55 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97,
57 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157,
58 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227,
59 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283,
60 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367,
61 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439,
62 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509,
63 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599,
64 601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661,
65 673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751,
66 757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829,
67 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919,
68 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997,1009,
77 if (primes[sizeof(primes)/sizeof(primes[0])-1] >= result) {
84 if ((result & 1) == 0)
93 if ((result % p) == 0) {
98 } while (pp < &primes[sizeof(primes) / sizeof(primes[0])]);
100 if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
101 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
102 DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG3,
103 "%d hash_divisor() divisions in %d tries"
104 " to get %d from %d",
105 divisions, tries, result, initial);
111 * Convert a timestamp to a number of seconds in the past.
114 delta_rrl_time(isc_stdtime_t ts, isc_stdtime_t now) {
122 * The timestamp is in the future. That future might result from
123 * re-ordered requests, because we use timestamps on requests
124 * instead of consulting a clock. Timestamps in the distant future are
125 * assumed to result from clock changes. When the clock changes to
126 * the past, make existing timestamps appear to be in the past.
128 if (delta < -DNS_RRL_MAX_TIME_TRAVEL)
129 return (DNS_RRL_FOREVER);
134 get_age(const dns_rrl_t *rrl, const dns_rrl_entry_t *e, isc_stdtime_t now) {
136 return (DNS_RRL_FOREVER);
137 return (delta_rrl_time(e->ts + rrl->ts_bases[e->ts_gen], now));
141 set_age(dns_rrl_t *rrl, dns_rrl_entry_t *e, isc_stdtime_t now) {
142 dns_rrl_entry_t *e_old;
146 ts_gen = rrl->ts_gen;
147 ts = now - rrl->ts_bases[ts_gen];
149 if (ts < -DNS_RRL_MAX_TIME_TRAVEL)
150 ts = DNS_RRL_FOREVER;
156 * Make a new timestamp base if the current base is too old.
157 * All entries older than DNS_RRL_MAX_WINDOW seconds are ancient,
158 * useless history. Their timestamps can be treated as if they are
160 * We only do arithmetic on more recent timestamps, so bases for
161 * older timestamps can be recycled provided the old timestamps are
162 * marked as ancient history.
163 * This loop is almost always very short because most entries are
164 * recycled after one second and any entries that need to be marked
165 * are older than (DNS_RRL_TS_BASES)*DNS_RRL_MAX_TS seconds.
167 if (ts >= DNS_RRL_MAX_TS) {
168 ts_gen = (ts_gen + 1) % DNS_RRL_TS_BASES;
169 for (e_old = ISC_LIST_TAIL(rrl->lru), i = 0;
170 e_old != NULL && (e_old->ts_gen == ts_gen ||
171 !ISC_LINK_LINKED(e_old, hlink));
172 e_old = ISC_LIST_PREV(e_old, lru), ++i)
174 e_old->ts_valid = ISC_FALSE;
177 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
178 DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1,
179 "rrl new time base scanned %d entries"
180 " at %d for %d %d %d %d",
181 i, now, rrl->ts_bases[ts_gen],
182 rrl->ts_bases[(ts_gen + 1) %
184 rrl->ts_bases[(ts_gen + 2) %
186 rrl->ts_bases[(ts_gen + 3) %
188 rrl->ts_gen = ts_gen;
189 rrl->ts_bases[ts_gen] = now;
195 e->ts_valid = ISC_TRUE;
199 expand_entries(dns_rrl_t *rrl, int new) {
206 if (rrl->num_entries + new >= rrl->max_entries &&
207 rrl->max_entries != 0)
209 new = rrl->max_entries - rrl->num_entries;
211 return (ISC_R_SUCCESS);
215 * Log expansions so that the user can tune max-table-size
216 * and min-table-size.
218 if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP) &&
221 if (rrl->searches != 0)
222 rate /= rrl->searches;
223 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
224 DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
225 "increase from %d to %d RRL entries with"
226 " %d bins; average search length %.1f",
227 rrl->num_entries, rrl->num_entries+new,
228 rrl->hash->length, rate);
231 bsize = sizeof(dns_rrl_block_t) + (new-1)*sizeof(dns_rrl_entry_t);
232 b = isc_mem_get(rrl->mctx, bsize);
234 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
235 DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_FAIL,
236 "isc_mem_get(%d) failed for RRL entries",
238 return (ISC_R_NOMEMORY);
244 for (i = 0; i < new; ++i, ++e) {
245 ISC_LINK_INIT(e, hlink);
246 ISC_LIST_INITANDAPPEND(rrl->lru, e, lru);
248 rrl->num_entries += new;
249 ISC_LIST_INITANDAPPEND(rrl->blocks, b, link);
251 return (ISC_R_SUCCESS);
254 static inline dns_rrl_bin_t *
255 get_bin(dns_rrl_hash_t *hash, unsigned int hval) {
256 return (&hash->bins[hval % hash->length]);
260 free_old_hash(dns_rrl_t *rrl) {
261 dns_rrl_hash_t *old_hash;
262 dns_rrl_bin_t *old_bin;
263 dns_rrl_entry_t *e, *e_next;
265 old_hash = rrl->old_hash;
266 for (old_bin = &old_hash->bins[0];
267 old_bin < &old_hash->bins[old_hash->length];
270 for (e = ISC_LIST_HEAD(*old_bin); e != NULL; e = e_next) {
271 e_next = ISC_LIST_NEXT(e, hlink);
272 ISC_LINK_INIT(e, hlink);
276 isc_mem_put(rrl->mctx, old_hash,
278 + (old_hash->length - 1) * sizeof(old_hash->bins[0]));
279 rrl->old_hash = NULL;
283 expand_rrl_hash(dns_rrl_t *rrl, isc_stdtime_t now) {
284 dns_rrl_hash_t *hash;
285 int old_bins, new_bins, hsize;
288 if (rrl->old_hash != NULL)
292 * Most searches fail and so go to the end of the chain.
293 * Use a small hash table load factor.
295 old_bins = (rrl->hash == NULL) ? 0 : rrl->hash->length;
296 new_bins = old_bins/8 + old_bins;
297 if (new_bins < rrl->num_entries)
298 new_bins = rrl->num_entries;
299 new_bins = hash_divisor(new_bins);
301 hsize = sizeof(dns_rrl_hash_t) + (new_bins-1)*sizeof(hash->bins[0]);
302 hash = isc_mem_get(rrl->mctx, hsize);
304 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
305 DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_FAIL,
306 "isc_mem_get(%d) failed for"
309 return (ISC_R_NOMEMORY);
311 memset(hash, 0, hsize);
312 hash->length = new_bins;
314 hash->gen = rrl->hash_gen;
316 if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP) && old_bins != 0) {
318 if (rrl->searches != 0)
319 rate /= rrl->searches;
320 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
321 DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
322 "increase from %d to %d RRL bins for"
323 " %d entries; average search length %.1f",
324 old_bins, new_bins, rrl->num_entries, rate);
327 rrl->old_hash = rrl->hash;
328 if (rrl->old_hash != NULL)
329 rrl->old_hash->check_time = now;
332 return (ISC_R_SUCCESS);
336 ref_entry(dns_rrl_t *rrl, dns_rrl_entry_t *e, int probes, isc_stdtime_t now) {
338 * Make the entry most recently used.
340 if (ISC_LIST_HEAD(rrl->lru) != e) {
341 if (e == rrl->last_logged)
342 rrl->last_logged = ISC_LIST_PREV(e, lru);
343 ISC_LIST_UNLINK(rrl->lru, e, lru);
344 ISC_LIST_PREPEND(rrl->lru, e, lru);
348 * Expand the hash table if it is time and necessary.
349 * This will leave the newly referenced entry in a chain in the
350 * old hash table. It will migrate to the new hash table the next
351 * time it is used or be cut loose when the old hash table is destroyed.
353 rrl->probes += probes;
355 if (rrl->searches > 100 &&
356 delta_rrl_time(rrl->hash->check_time, now) > 1) {
357 if (rrl->probes/rrl->searches > 2)
358 expand_rrl_hash(rrl, now);
359 rrl->hash->check_time = now;
365 static inline isc_boolean_t
366 key_cmp(const dns_rrl_key_t *a, const dns_rrl_key_t *b) {
367 if (memcmp(a, b, sizeof(dns_rrl_key_t)) == 0)
372 static inline isc_uint32_t
373 hash_key(const dns_rrl_key_t *key) {
378 for (i = sizeof(*key) / sizeof(key->w[0]) - 1; i >= 0; --i) {
379 hval = key->w[i] + (hval<<1);
385 * Construct the hash table key.
386 * Use a hash of the DNS query name to save space in the database.
387 * Collisions result in legitimate rate limiting responses for one
388 * query name also limiting responses for other names to the
389 * same client. This is rare and benign enough given the large
390 * space costs compared to keeping the entire name in the database
391 * entry or the time costs of dynamic allocation.
394 make_key(const dns_rrl_t *rrl, dns_rrl_key_t *key,
395 const isc_sockaddr_t *client_addr,
396 dns_rdatatype_t qtype, dns_name_t *qname, dns_rdataclass_t qclass,
397 dns_rrl_rtype_t rtype)
400 dns_offsets_t base_offsets;
403 memset(key, 0, sizeof(*key));
405 key->s.rtype = rtype;
406 if (rtype == DNS_RRL_RTYPE_QUERY) {
407 key->s.qtype = qtype;
408 key->s.qclass = qclass & 0xff;
409 } else if (rtype == DNS_RRL_RTYPE_REFERRAL ||
410 rtype == DNS_RRL_RTYPE_NODATA) {
412 * Because there is no qtype in the empty answer sections of
413 * referral and NODATA responses, count them as the same.
415 key->s.qclass = qclass & 0xff;
418 if (qname != NULL && qname->labels != 0) {
420 * Ignore the first label of wildcards.
422 if ((qname->attributes & DNS_NAMEATTR_WILDCARD) != 0 &&
423 (labels = dns_name_countlabels(qname)) > 1)
425 dns_name_init(&base, base_offsets);
426 dns_name_getlabelsequence(qname, 1, labels-1, &base);
427 key->s.qname_hash = dns_name_hashbylabel(&base,
430 key->s.qname_hash = dns_name_hashbylabel(qname,
435 switch (client_addr->type.sa.sa_family) {
437 key->s.ip[0] = (client_addr->type.sin.sin_addr.s_addr &
441 key->s.ipv6 = ISC_TRUE;
442 memmove(key->s.ip, &client_addr->type.sin6.sin6_addr,
444 for (i = 0; i < DNS_RRL_MAX_PREFIX/32; ++i)
445 key->s.ip[i] &= rrl->ipv6_mask[i];
450 static inline dns_rrl_rate_t *
451 get_rate(dns_rrl_t *rrl, dns_rrl_rtype_t rtype) {
453 case DNS_RRL_RTYPE_QUERY:
454 return (&rrl->responses_per_second);
455 case DNS_RRL_RTYPE_REFERRAL:
456 return (&rrl->referrals_per_second);
457 case DNS_RRL_RTYPE_NODATA:
458 return (&rrl->nodata_per_second);
459 case DNS_RRL_RTYPE_NXDOMAIN:
460 return (&rrl->nxdomains_per_second);
461 case DNS_RRL_RTYPE_ERROR:
462 return (&rrl->errors_per_second);
463 case DNS_RRL_RTYPE_ALL:
464 return (&rrl->all_per_second);
472 response_balance(dns_rrl_t *rrl, const dns_rrl_entry_t *e, int age) {
473 dns_rrl_rate_t *ratep;
476 if (e->key.s.rtype == DNS_RRL_RTYPE_TCP) {
479 ratep = get_rate(rrl, e->key.s.rtype);
480 rate = ratep->scaled;
483 balance = e->responses + age * rate;
490 * Search for an entry for a response and optionally create it.
492 static dns_rrl_entry_t *
493 get_entry(dns_rrl_t *rrl, const isc_sockaddr_t *client_addr,
494 dns_rdataclass_t qclass, dns_rdatatype_t qtype, dns_name_t *qname,
495 dns_rrl_rtype_t rtype, isc_stdtime_t now, isc_boolean_t create,
496 char *log_buf, unsigned int log_buf_len)
501 dns_rrl_hash_t *hash;
502 dns_rrl_bin_t *new_bin, *old_bin;
505 make_key(rrl, &key, client_addr, qtype, qname, qclass, rtype);
506 hval = hash_key(&key);
509 * Look for the entry in the current hash table.
511 new_bin = get_bin(rrl->hash, hval);
513 e = ISC_LIST_HEAD(*new_bin);
515 if (key_cmp(&e->key, &key)) {
516 ref_entry(rrl, e, probes, now);
520 e = ISC_LIST_NEXT(e, hlink);
524 * Look in the old hash table.
526 if (rrl->old_hash != NULL) {
527 old_bin = get_bin(rrl->old_hash, hval);
528 e = ISC_LIST_HEAD(*old_bin);
530 if (key_cmp(&e->key, &key)) {
531 ISC_LIST_UNLINK(*old_bin, e, hlink);
532 ISC_LIST_PREPEND(*new_bin, e, hlink);
533 e->hash_gen = rrl->hash_gen;
534 ref_entry(rrl, e, probes, now);
537 e = ISC_LIST_NEXT(e, hlink);
541 * Discard prevous hash table when all of its entries are old.
543 age = delta_rrl_time(rrl->old_hash->check_time, now);
544 if (age > rrl->window)
552 * The entry does not exist, so create it by finding a free entry.
553 * Keep currently penalized and logged entries.
554 * Try to make more entries if none are idle.
555 * Steal the oldest entry if we cannot create more.
557 for (e = ISC_LIST_TAIL(rrl->lru);
559 e = ISC_LIST_PREV(e, lru))
561 if (!ISC_LINK_LINKED(e, hlink))
563 age = get_age(rrl, e, now);
568 if (!e->logged && response_balance(rrl, e, age) > 0)
572 expand_entries(rrl, ISC_MIN((rrl->num_entries+1)/2, 1000));
573 e = ISC_LIST_TAIL(rrl->lru);
576 log_end(rrl, e, ISC_TRUE, log_buf, log_buf_len);
577 if (ISC_LINK_LINKED(e, hlink)) {
578 if (e->hash_gen == rrl->hash_gen)
581 hash = rrl->old_hash;
582 old_bin = get_bin(hash, hash_key(&e->key));
583 ISC_LIST_UNLINK(*old_bin, e, hlink);
585 ISC_LIST_PREPEND(*new_bin, e, hlink);
586 e->hash_gen = rrl->hash_gen;
588 e->ts_valid = ISC_FALSE;
589 ref_entry(rrl, e, probes, now);
594 debit_log(const dns_rrl_entry_t *e, int age, const char *action) {
595 char buf[sizeof("age=12345678")];
598 if (age == DNS_RRL_FOREVER) {
601 snprintf(buf, sizeof(buf), "age=%d", age);
604 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
605 DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG3,
606 "rrl %08x %6s responses=%-3d %s",
607 hash_key(&e->key), age_str, e->responses, action);
610 static inline dns_rrl_result_t
611 debit_rrl_entry(dns_rrl_t *rrl, dns_rrl_entry_t *e, double qps, double scale,
612 const isc_sockaddr_t *client_addr, isc_stdtime_t now,
613 char *log_buf, unsigned int log_buf_len)
615 int rate, new_rate, slip, new_slip, age, log_secs, min;
616 dns_rrl_rate_t *ratep;
617 dns_rrl_entry_t const *credit_e;
620 * Pick the rate counter.
621 * Optionally adjust the rate by the estimated query/second rate.
623 ratep = get_rate(rrl, e->key.s.rtype);
626 return (DNS_RRL_RESULT_OK);
630 * The limit for clients that have used TCP is not scaled.
632 credit_e = get_entry(rrl, client_addr,
633 0, dns_rdatatype_none, NULL,
634 DNS_RRL_RTYPE_TCP, now, ISC_FALSE,
635 log_buf, log_buf_len);
636 if (credit_e != NULL) {
637 age = get_age(rrl, e, now);
638 if (age < rrl->window)
643 new_rate = (int) (rate * scale);
646 if (ratep->scaled != new_rate) {
647 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
648 DNS_LOGMODULE_REQUEST,
650 "%d qps scaled %s by %.2f"
652 (int)qps, ratep->str, scale,
655 ratep->scaled = rate;
659 min = -rrl->window * rate;
662 * Treat time jumps into the recent past as no time.
663 * Treat entries older than the window as if they were just created
664 * Credit other entries.
666 age = get_age(rrl, e, now);
669 * Credit tokens earned during elapsed time.
671 if (age > rrl->window) {
675 e->responses += rate*age;
676 if (e->responses > rate) {
682 * Find the seconds since last log message without overflowing
683 * small counter. This counter is reset when an entry is
684 * created. It is not necessarily reset when some requests
685 * are answered provided other requests continue to be dropped
686 * or slipped. This can happen when the request rate is just
690 log_secs = e->log_secs;
692 if (log_secs > DNS_RRL_MAX_LOG_SECS || log_secs < 0)
693 log_secs = DNS_RRL_MAX_LOG_SECS;
694 e->log_secs = log_secs;
697 set_age(rrl, e, now);
700 * Debit the entry for this response.
702 if (--e->responses >= 0) {
703 if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
704 debit_log(e, age, "");
705 return (DNS_RRL_RESULT_OK);
708 if (e->responses < min)
712 * Drop this response unless it should slip or leak.
715 if (slip > 2 && scale < 1.0) {
716 new_slip = (int) (slip * scale);
719 if (rrl->slip.scaled != new_slip) {
720 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
721 DNS_LOGMODULE_REQUEST,
724 " by %.2f from %d to %d",
728 rrl->slip.scaled = slip;
731 if (slip != 0 && e->key.s.rtype != DNS_RRL_RTYPE_ALL) {
732 if (e->slip_cnt++ == 0) {
733 if ((int) e->slip_cnt >= slip)
735 if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
736 debit_log(e, age, "slip");
737 return (DNS_RRL_RESULT_SLIP);
738 } else if ((int) e->slip_cnt >= slip) {
743 if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
744 debit_log(e, age, "drop");
745 return (DNS_RRL_RESULT_DROP);
748 static inline dns_rrl_qname_buf_t *
749 get_qname(dns_rrl_t *rrl, const dns_rrl_entry_t *e) {
750 dns_rrl_qname_buf_t *qbuf;
752 qbuf = rrl->qnames[e->log_qname];
753 if (qbuf == NULL || qbuf->e != e)
759 free_qname(dns_rrl_t *rrl, dns_rrl_entry_t *e) {
760 dns_rrl_qname_buf_t *qbuf;
762 qbuf = get_qname(rrl, e);
765 ISC_LIST_APPEND(rrl->qname_free, qbuf, link);
770 add_log_str(isc_buffer_t *lb, const char *str, unsigned int str_len) {
773 isc_buffer_availableregion(lb, ®ion);
774 if (str_len >= region.length) {
775 if (region.length <= 0)
777 str_len = region.length;
779 memmove(region.base, str, str_len);
780 isc_buffer_add(lb, str_len);
783 #define ADD_LOG_CSTR(eb, s) add_log_str(eb, s, sizeof(s)-1)
786 * Build strings for the logs
789 make_log_buf(dns_rrl_t *rrl, dns_rrl_entry_t *e,
790 const char *str1, const char *str2, isc_boolean_t plural,
791 dns_name_t *qname, isc_boolean_t save_qname,
792 dns_rrl_result_t rrl_result, isc_result_t resp_result,
793 char *log_buf, unsigned int log_buf_len)
796 dns_rrl_qname_buf_t *qbuf;
798 char strbuf[ISC_MAX(sizeof("/123"), sizeof(" (12345678)"))];
800 isc_result_t msg_result;
802 if (log_buf_len <= 1) {
803 if (log_buf_len == 1)
807 isc_buffer_init(&lb, log_buf, log_buf_len-1);
810 add_log_str(&lb, str1, strlen(str1));
812 add_log_str(&lb, str2, strlen(str2));
814 switch (rrl_result) {
815 case DNS_RRL_RESULT_OK:
817 case DNS_RRL_RESULT_DROP:
818 ADD_LOG_CSTR(&lb, "drop ");
820 case DNS_RRL_RESULT_SLIP:
821 ADD_LOG_CSTR(&lb, "slip ");
828 switch (e->key.s.rtype) {
829 case DNS_RRL_RTYPE_QUERY:
831 case DNS_RRL_RTYPE_REFERRAL:
832 ADD_LOG_CSTR(&lb, "referral ");
834 case DNS_RRL_RTYPE_NODATA:
835 ADD_LOG_CSTR(&lb, "NODATA ");
837 case DNS_RRL_RTYPE_NXDOMAIN:
838 ADD_LOG_CSTR(&lb, "NXDOMAIN ");
840 case DNS_RRL_RTYPE_ERROR:
841 if (resp_result == ISC_R_SUCCESS) {
842 ADD_LOG_CSTR(&lb, "error ");
844 rstr = isc_result_totext(resp_result);
845 add_log_str(&lb, rstr, strlen(rstr));
846 ADD_LOG_CSTR(&lb, " error ");
849 case DNS_RRL_RTYPE_ALL:
850 ADD_LOG_CSTR(&lb, "all ");
857 ADD_LOG_CSTR(&lb, "responses to ");
859 ADD_LOG_CSTR(&lb, "response to ");
861 memset(&cidr, 0, sizeof(cidr));
863 snprintf(strbuf, sizeof(strbuf), "/%d", rrl->ipv6_prefixlen);
864 cidr.family = AF_INET6;
865 memset(&cidr.type.in6, 0, sizeof(cidr.type.in6));
866 memmove(&cidr.type.in6, e->key.s.ip, sizeof(e->key.s.ip));
868 snprintf(strbuf, sizeof(strbuf), "/%d", rrl->ipv4_prefixlen);
869 cidr.family = AF_INET;
870 cidr.type.in.s_addr = e->key.s.ip[0];
872 msg_result = isc_netaddr_totext(&cidr, &lb);
873 if (msg_result != ISC_R_SUCCESS)
874 ADD_LOG_CSTR(&lb, "?");
875 add_log_str(&lb, strbuf, strlen(strbuf));
877 if (e->key.s.rtype == DNS_RRL_RTYPE_QUERY ||
878 e->key.s.rtype == DNS_RRL_RTYPE_REFERRAL ||
879 e->key.s.rtype == DNS_RRL_RTYPE_NODATA ||
880 e->key.s.rtype == DNS_RRL_RTYPE_NXDOMAIN) {
881 qbuf = get_qname(rrl, e);
882 if (save_qname && qbuf == NULL &&
883 qname != NULL && dns_name_isabsolute(qname)) {
885 * Capture the qname for the "stop limiting" message.
887 qbuf = ISC_LIST_TAIL(rrl->qname_free);
889 ISC_LIST_UNLINK(rrl->qname_free, qbuf, link);
890 } else if (rrl->num_qnames < DNS_RRL_QNAMES) {
891 qbuf = isc_mem_get(rrl->mctx, sizeof(*qbuf));
893 memset(qbuf, 0, sizeof(*qbuf));
894 ISC_LINK_INIT(qbuf, link);
895 qbuf->index = rrl->num_qnames;
896 rrl->qnames[rrl->num_qnames++] = qbuf;
898 isc_log_write(dns_lctx,
900 DNS_LOGMODULE_REQUEST,
903 " failed for RRL qname",
908 e->log_qname = qbuf->index;
910 dns_fixedname_init(&qbuf->qname);
912 dns_fixedname_name(&qbuf->qname),
917 qname = dns_fixedname_name(&qbuf->qname);
919 ADD_LOG_CSTR(&lb, " for ");
920 (void)dns_name_totext(qname, ISC_TRUE, &lb);
922 ADD_LOG_CSTR(&lb, " for (?)");
924 if (e->key.s.rtype != DNS_RRL_RTYPE_NXDOMAIN) {
925 ADD_LOG_CSTR(&lb, " ");
926 (void)dns_rdataclass_totext(e->key.s.qclass, &lb);
927 if (e->key.s.rtype == DNS_RRL_RTYPE_QUERY) {
928 ADD_LOG_CSTR(&lb, " ");
929 (void)dns_rdatatype_totext(e->key.s.qtype, &lb);
932 snprintf(strbuf, sizeof(strbuf), " (%08x)",
933 e->key.s.qname_hash);
934 add_log_str(&lb, strbuf, strlen(strbuf));
938 * We saved room for '\0'.
940 log_buf[isc_buffer_usedlength(&lb)] = '\0';
944 log_end(dns_rrl_t *rrl, dns_rrl_entry_t *e, isc_boolean_t early,
945 char *log_buf, unsigned int log_buf_len)
950 rrl->log_only ? "would stop limiting "
952 ISC_TRUE, NULL, ISC_FALSE,
953 DNS_RRL_RESULT_OK, ISC_R_SUCCESS,
954 log_buf, log_buf_len);
955 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
956 DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
959 e->logged = ISC_FALSE;
965 * Log messages for streams that have stopped being rate limited.
968 log_stops(dns_rrl_t *rrl, isc_stdtime_t now, int limit,
969 char *log_buf, unsigned int log_buf_len)
974 for (e = rrl->last_logged; e != NULL; e = ISC_LIST_PREV(e, lru)) {
978 age = get_age(rrl, e, now);
979 if (age < DNS_RRL_STOP_LOG_SECS ||
980 response_balance(rrl, e, age) < 0)
984 log_end(rrl, e, now == 0, log_buf, log_buf_len);
985 if (rrl->num_logged <= 0)
989 * Too many messages could stall real work.
992 rrl->last_logged = ISC_LIST_PREV(e, lru);
997 INSIST(rrl->num_logged == 0);
998 rrl->log_stops_time = now;
1000 rrl->last_logged = e;
1004 * Main rate limit interface.
1007 dns_rrl(dns_view_t *view,
1008 const isc_sockaddr_t *client_addr, isc_boolean_t is_tcp,
1009 dns_rdataclass_t qclass, dns_rdatatype_t qtype,
1010 dns_name_t *qname, isc_result_t resp_result, isc_stdtime_t now,
1011 isc_boolean_t wouldlog, char *log_buf, unsigned int log_buf_len)
1014 dns_rrl_rtype_t rtype;
1016 isc_netaddr_t netclient;
1020 isc_result_t result;
1021 dns_rrl_result_t rrl_result;
1023 INSIST(log_buf != NULL && log_buf_len > 0);
1026 if (rrl->exempt != NULL) {
1027 isc_netaddr_fromsockaddr(&netclient, client_addr);
1028 result = dns_acl_match(&netclient, NULL, rrl->exempt,
1029 &view->aclenv, &exempt_match, NULL);
1030 if (result == ISC_R_SUCCESS && exempt_match > 0)
1031 return (DNS_RRL_RESULT_OK);
1037 * Estimate total query per second rate when scaling by qps.
1039 if (rrl->qps_scale == 0) {
1043 ++rrl->qps_responses;
1044 secs = delta_rrl_time(rrl->qps_time, now);
1048 qps = (1.0*rrl->qps_responses) / secs;
1049 if (secs >= rrl->window) {
1050 if (isc_log_wouldlog(dns_lctx,
1051 DNS_RRL_LOG_DEBUG3))
1052 isc_log_write(dns_lctx,
1053 DNS_LOGCATEGORY_RRL,
1054 DNS_LOGMODULE_REQUEST,
1056 "%d responses/%d seconds"
1058 rrl->qps_responses, secs,
1061 rrl->qps_responses = 0;
1062 rrl->qps_time = now;
1063 } else if (qps < rrl->qps) {
1067 scale = rrl->qps_scale / qps;
1071 * Do maintenance once per second.
1073 if (rrl->num_logged > 0 && rrl->log_stops_time != now)
1074 log_stops(rrl, now, 8, log_buf, log_buf_len);
1077 * Notice TCP responses when scaling limits by qps.
1078 * Do not try to rate limit TCP responses.
1082 e = get_entry(rrl, client_addr,
1083 0, dns_rdatatype_none, NULL,
1084 DNS_RRL_RTYPE_TCP, now, ISC_TRUE,
1085 log_buf, log_buf_len);
1087 e->responses = -(rrl->window+1);
1088 set_age(rrl, e, now);
1092 return (ISC_R_SUCCESS);
1096 * Find the right kind of entry, creating it if necessary.
1097 * If that is impossible, then nothing more can be done
1099 switch (resp_result) {
1101 rtype = DNS_RRL_RTYPE_QUERY;
1103 case DNS_R_DELEGATION:
1104 rtype = DNS_RRL_RTYPE_REFERRAL;
1107 rtype = DNS_RRL_RTYPE_NODATA;
1109 case DNS_R_NXDOMAIN:
1110 rtype = DNS_RRL_RTYPE_NXDOMAIN;
1113 rtype = DNS_RRL_RTYPE_ERROR;
1116 e = get_entry(rrl, client_addr, qclass, qtype, qname, rtype,
1117 now, ISC_TRUE, log_buf, log_buf_len);
1120 return (DNS_RRL_RESULT_OK);
1123 if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1)) {
1125 * Do not worry about speed or releasing the lock.
1126 * This message appears before messages from debit_rrl_entry().
1128 make_log_buf(rrl, e, "consider limiting ", NULL, ISC_FALSE,
1129 qname, ISC_FALSE, DNS_RRL_RESULT_OK, resp_result,
1130 log_buf, log_buf_len);
1131 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
1132 DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1,
1136 rrl_result = debit_rrl_entry(rrl, e, qps, scale, client_addr, now,
1137 log_buf, log_buf_len);
1139 if (rrl->all_per_second.r != 0) {
1141 * We must debit the all-per-second token bucket if we have
1142 * an all-per-second limit for the IP address.
1143 * The all-per-second limit determines the log message
1144 * when both limits are hit.
1145 * The response limiting must continue if the
1146 * all-per-second limiting lapses.
1148 dns_rrl_entry_t *e_all;
1149 dns_rrl_result_t rrl_all_result;
1151 e_all = get_entry(rrl, client_addr,
1152 0, dns_rdatatype_none, NULL,
1153 DNS_RRL_RTYPE_ALL, now, ISC_TRUE,
1154 log_buf, log_buf_len);
1155 if (e_all == NULL) {
1157 return (DNS_RRL_RESULT_OK);
1159 rrl_all_result = debit_rrl_entry(rrl, e_all, qps, scale,
1161 log_buf, log_buf_len);
1162 if (rrl_all_result != DNS_RRL_RESULT_OK) {
1166 rrl_result = rrl_all_result;
1167 if (rrl_result == DNS_RRL_RESULT_OK)
1168 level = DNS_RRL_LOG_DEBUG2;
1170 level = DNS_RRL_LOG_DEBUG1;
1171 if (isc_log_wouldlog(dns_lctx, level)) {
1172 make_log_buf(rrl, e,
1173 "prefer all-per-second limiting ",
1174 NULL, ISC_TRUE, qname, ISC_FALSE,
1175 DNS_RRL_RESULT_OK, resp_result,
1176 log_buf, log_buf_len);
1177 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
1178 DNS_LOGMODULE_REQUEST, level,
1184 if (rrl_result == DNS_RRL_RESULT_OK) {
1186 return (DNS_RRL_RESULT_OK);
1190 * Log occassionally in the rate-limit category.
1192 if ((!e->logged || e->log_secs >= DNS_RRL_MAX_LOG_SECS) &&
1193 isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP)) {
1194 make_log_buf(rrl, e, rrl->log_only ? "would " : NULL,
1195 e->logged ? "continue limiting " : "limit ",
1196 ISC_TRUE, qname, ISC_TRUE,
1197 DNS_RRL_RESULT_OK, resp_result,
1198 log_buf, log_buf_len);
1200 e->logged = ISC_TRUE;
1201 if (++rrl->num_logged <= 1)
1202 rrl->last_logged = e;
1207 * Avoid holding the lock.
1213 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
1214 DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
1219 * Make a log message for the caller.
1222 make_log_buf(rrl, e,
1223 rrl->log_only ? "would rate limit " : "rate limit ",
1224 NULL, ISC_FALSE, qname, ISC_FALSE,
1225 rrl_result, resp_result, log_buf, log_buf_len);
1229 * Do not save the qname unless we might need it for
1230 * the ending log message.
1237 return (rrl_result);
1241 dns_rrl_view_destroy(dns_view_t *view) {
1245 char log_buf[DNS_RRL_LOG_BUF_LEN];
1254 * Assume the caller takes care of locking the view and anything else.
1257 if (rrl->num_logged > 0)
1258 log_stops(rrl, 0, ISC_INT32_MAX, log_buf, sizeof(log_buf));
1260 for (i = 0; i < DNS_RRL_QNAMES; ++i) {
1261 if (rrl->qnames[i] == NULL)
1263 isc_mem_put(rrl->mctx, rrl->qnames[i], sizeof(*rrl->qnames[i]));
1266 if (rrl->exempt != NULL)
1267 dns_acl_detach(&rrl->exempt);
1269 DESTROYLOCK(&rrl->lock);
1271 while (!ISC_LIST_EMPTY(rrl->blocks)) {
1272 b = ISC_LIST_HEAD(rrl->blocks);
1273 ISC_LIST_UNLINK(rrl->blocks, b, link);
1274 isc_mem_put(rrl->mctx, b, b->size);
1279 isc_mem_put(rrl->mctx, h,
1280 sizeof(*h) + (h->length - 1) * sizeof(h->bins[0]));
1284 isc_mem_put(rrl->mctx, h,
1285 sizeof(*h) + (h->length - 1) * sizeof(h->bins[0]));
1287 isc_mem_putanddetach(&rrl->mctx, rrl, sizeof(*rrl));
1291 dns_rrl_init(dns_rrl_t **rrlp, dns_view_t *view, int min_entries) {
1293 isc_result_t result;
1297 rrl = isc_mem_get(view->mctx, sizeof(*rrl));
1299 return (ISC_R_NOMEMORY);
1300 memset(rrl, 0, sizeof(*rrl));
1301 isc_mem_attach(view->mctx, &rrl->mctx);
1302 result = isc_mutex_init(&rrl->lock);
1303 if (result != ISC_R_SUCCESS) {
1304 isc_mem_putanddetach(&rrl->mctx, rrl, sizeof(*rrl));
1307 isc_stdtime_get(&rrl->ts_bases[0]);
1311 result = expand_entries(rrl, min_entries);
1312 if (result != ISC_R_SUCCESS) {
1313 dns_rrl_view_destroy(view);
1316 result = expand_rrl_hash(rrl, 0);
1317 if (result != ISC_R_SUCCESS) {
1318 dns_rrl_view_destroy(view);
1323 return (ISC_R_SUCCESS);