3 # plugin for munin to monitor usage of unbound servers.
4 # To install copy this to /usr/local/share/munin/plugins/unbound_munin_
5 # and use munin-node-configure (--suggest, --shell).
7 # (C) 2008 W.C.A. Wijngaards. BSD Licensed.
9 # To install; enable statistics and unbound-control in unbound.conf
10 # server: extended-statistics: yes
11 # statistics-cumulative: no
12 # statistics-interval: 0
13 # remote-control: control-enable: yes
14 # Run the command unbound-control-setup to generate the key files.
16 # Environment variables for this script
17 # statefile - where to put temporary statefile.
18 # unbound_conf - where the unbound.conf file is located.
19 # unbound_control - where to find unbound-control executable.
20 # spoof_warn - what level to warn about spoofing
21 # spoof_crit - what level to crit about spoofing
23 # You can set them in your munin/plugin-conf.d/plugins.conf file
27 # env.statefile /usr/local/var/munin/plugin-state/unbound-state
28 # env.unbound_conf /usr/local/etc/unbound/unbound.conf
29 # env.unbound_control /usr/local/sbin/unbound-control
31 # env.spoof_crit 100000
33 # This plugin can create different graphs depending on what name
34 # you link it as (with ln -s) into the plugins directory
35 # You can link it multiple times.
36 # If you are only a casual user, the _hits and _by_type are most interesting,
37 # possibly followed by _by_rcode.
39 # unbound_munin_hits - base volume, cache hits, unwanted traffic
40 # unbound_munin_queue - to monitor the internal requestlist
41 # unbound_munin_memory - memory usage
42 # unbound_munin_by_type - incoming queries by type
43 # unbound_munin_by_class - incoming queries by class
44 # unbound_munin_by_opcode - incoming queries by opcode
45 # unbound_munin_by_rcode - answers by rcode, validation status
46 # unbound_munin_by_flags - incoming queries by flags
47 # unbound_munin_histogram - histogram of query resolving times
49 # Magic markers - optional - used by installation scripts and
50 # munin-config: (originally contrib family but munin-node-configure ignores it)
53 #%# capabilities=autoconf suggest
59 unbound_munin_ - Munin plugin to monitor the Unbound DNS resolver.
61 =head1 APPLICABLE SYSTEMS
63 System with unbound daemon.
69 env.statefile /usr/local/var/munin/plugin-state/unbound-state
70 env.unbound_conf /usr/local/etc/unbound/unbound.conf
71 env.unbound_control /usr/local/sbin/unbound-control
75 Use the .env settings to override the defaults.
79 Can be used to present different graphs. Use ln -s for that name in
80 the plugins directory to enable the graph.
81 unbound_munin_hits - base volume, cache hits, unwanted traffic
82 unbound_munin_queue - to monitor the internal requestlist
83 unbound_munin_memory - memory usage
84 unbound_munin_by_type - incoming queries by type
85 unbound_munin_by_class - incoming queries by class
86 unbound_munin_by_opcode - incoming queries by opcode
87 unbound_munin_by_rcode - answers by rcode, validation status
88 unbound_munin_by_flags - incoming queries by flags
89 unbound_munin_histogram - histogram of query resolving times
93 Copyright 2008 W.C.A. Wijngaards
101 state=${statefile:-/usr/local/var/munin/plugin-state/unbound-state}
102 conf=${unbound_conf:-/usr/local/etc/unbound/unbound.conf}
103 ctrl=${unbound_control:-/usr/local/sbin/unbound-control}
104 warn=${spoof_warn:-1000}
105 crit=${spoof_crit:-100000}
108 # number of seconds between polling attempts.
109 # makes the statefile hang around for at least this many seconds,
110 # so that multiple links of this script can share the results.
113 # to keep things within 19 characters
114 ABBREV="-e s/total/t/ -e s/thread/t/ -e s/num/n/ -e s/query/q/ -e s/answer/a/ -e s/unwanted/u/ -e s/requestlist/ql/ -e s/type/t/ -e s/class/c/ -e s/opcode/o/ -e s/rcode/r/ -e s/edns/e/ -e s/mem/m/ -e s/cache/c/ -e s/mod/m/"
116 # get value from $1 into return variable $value
118 value="`grep '^'$1'=' $state | sed -e 's/^.*=//'`"
119 if test "$value"x = ""x; then
124 # download the state from the unbound server.
126 # obtain lock for fetching the state
127 # because there is a race condition in fetching and writing to file
129 # see if the lock is stale, if so, take it
130 if test -f $lock ; then
131 pid="`cat $lock 2>&1`"
132 kill -0 "$pid" >/dev/null 2>&1
133 if test $? -ne 0 -a "$pid" != $$ ; then
139 while test ! -f $lock || test "`cat $lock 2>&1`" != $$; do
140 while test -f $lock; do
143 if test $i -gt 1000; then
146 if test $i -gt 1500; then
147 echo "error locking $lock" "=" `cat $lock`
155 # do not refetch if the file exists and only LEE seconds old
156 if test -f $state; then
159 value="`echo $value | sed -e 's/\..*$//'`"
160 if test $now -lt `expr $value + $lee`; then
165 $ctrl -c $conf stats > $state
166 if test $? -ne 0; then
167 echo "error retrieving data from unbound server"
174 if test "$1" = "autoconf" ; then
175 if test ! -f $conf; then
176 echo no "($conf does not exist)"
179 if test ! -d `dirname $state`; then
180 echo no "(`dirname $state` directory does not exist)"
187 if test "$1" = "suggest" ; then
200 # determine my type, by name
201 id=`echo $0 | sed -e 's/^.*unbound_munin_//'`
202 if test "$id"x = ""x; then
203 # some default to keep people sane.
207 # if $1 exists in statefile, config is echoed with label $2
209 mn=`echo $1 | sed $ABBREV | tr . _`
210 if grep '^'$1'=' $state >/dev/null 2>&1; then
216 # print label and min 0 for a name $1 in unbound format
218 mn=`echo $1 | sed $ABBREV | tr . _`
223 if test "$1" = "config" ; then
224 if test ! -f $state; then
229 echo "graph_title Unbound DNS traffic and cache hits"
230 echo "graph_args --base 1000 -l 0"
231 echo "graph_vlabel queries / second"
232 echo "graph_category DNS"
233 for x in `grep "^thread[0-9][0-9]*\.num\.queries=" $state |
234 sed -e 's/=.*//'`; do
235 exist_config $x "queries handled by `basename $x .num.queries`"
237 p_config "total.num.queries" "total queries from clients"
238 p_config "total.num.cachehits" "cache hits"
239 p_config "total.num.prefetch" "cache prefetch"
240 p_config "num.query.tcp" "TCP queries"
241 p_config "num.query.ipv6" "IPv6 queries"
242 p_config "unwanted.queries" "queries that failed acl"
243 p_config "unwanted.replies" "unwanted or unsolicited replies"
244 echo "u_replies.warning $warn"
245 echo "u_replies.critical $crit"
246 echo "graph_info DNS queries to the recursive resolver. The unwanted replies could be innocent duplicate packets, late replies, or spoof threats."
249 echo "graph_title Unbound requestlist size"
250 echo "graph_args --base 1000 -l 0"
251 echo "graph_vlabel number of queries"
252 echo "graph_category DNS"
253 p_config "total.requestlist.avg" "Average size of queue on insert"
254 p_config "total.requestlist.max" "Max size of queue (in 5 min)"
255 p_config "total.requestlist.overwritten" "Number of queries replaced by new ones"
256 p_config "total.requestlist.exceeded" "Number of queries dropped due to lack of space"
257 echo "graph_info The queries that did not hit the cache and need recursion service take up space in the requestlist. If there are too many queries, first queries get overwritten, and at last resort dropped."
260 echo "graph_title Unbound memory usage"
261 echo "graph_args --base 1024 -l 0"
262 echo "graph_vlabel memory used in bytes"
263 echo "graph_category DNS"
264 p_config "mem.total.sbrk" "Total memory"
265 p_config "mem.cache.rrset" "RRset cache memory"
266 p_config "mem.cache.message" "Message cache memory"
267 p_config "mem.mod.iterator" "Iterator module memory"
268 p_config "mem.mod.validator" "Validator module and key cache memory"
269 echo "graph_info The memory used by unbound."
272 echo "graph_title Unbound DNS queries by type"
273 echo "graph_args --base 1000 -l 0"
274 echo "graph_vlabel queries / second"
275 echo "graph_category DNS"
276 for x in `grep "^num.query.type" $state`; do
277 nm=`echo $x | sed -e 's/=.*$//'`
278 tp=`echo $nm | sed -e s/num.query.type.//`
281 echo "graph_info queries by DNS RR type queried for"
284 echo "graph_title Unbound DNS queries by class"
285 echo "graph_args --base 1000 -l 0"
286 echo "graph_vlabel queries / second"
287 echo "graph_category DNS"
288 for x in `grep "^num.query.class" $state`; do
289 nm=`echo $x | sed -e 's/=.*$//'`
290 tp=`echo $nm | sed -e s/num.query.class.//`
293 echo "graph_info queries by DNS RR class queried for."
296 echo "graph_title Unbound DNS queries by opcode"
297 echo "graph_args --base 1000 -l 0"
298 echo "graph_vlabel queries / second"
299 echo "graph_category DNS"
300 for x in `grep "^num.query.opcode" $state`; do
301 nm=`echo $x | sed -e 's/=.*$//'`
302 tp=`echo $nm | sed -e s/num.query.opcode.//`
305 echo "graph_info queries by opcode in the query packet."
308 echo "graph_title Unbound DNS answers by return code"
309 echo "graph_args --base 1000 -l 0"
310 echo "graph_vlabel answer packets / second"
311 echo "graph_category DNS"
312 for x in `grep "^num.answer.rcode" $state`; do
313 nm=`echo $x | sed -e 's/=.*$//'`
314 tp=`echo $nm | sed -e s/num.answer.rcode.//`
317 p_config "num.answer.secure" "answer secure"
318 p_config "num.answer.bogus" "answer bogus"
319 p_config "num.rrset.bogus" "num rrsets marked bogus"
320 echo "graph_info answers sorted by return value. rrsets bogus is the number of rrsets marked bogus per second by the validator"
323 echo "graph_title Unbound DNS incoming queries by flags"
324 echo "graph_args --base 1000 -l 0"
325 echo "graph_vlabel queries / second"
326 echo "graph_category DNS"
327 p_config "num.query.flags.QR" "QR (query reply) flag"
328 p_config "num.query.flags.AA" "AA (auth answer) flag"
329 p_config "num.query.flags.TC" "TC (truncated) flag"
330 p_config "num.query.flags.RD" "RD (recursion desired) flag"
331 p_config "num.query.flags.RA" "RA (rec avail) flag"
332 p_config "num.query.flags.Z" "Z (zero) flag"
333 p_config "num.query.flags.AD" "AD (auth data) flag"
334 p_config "num.query.flags.CD" "CD (check disabled) flag"
335 p_config "num.query.edns.present" "EDNS OPT present"
336 p_config "num.query.edns.DO" "DO (DNSSEC OK) flag"
337 echo "graph_info This graphs plots the flags inside incoming queries. For example, if QR, AA, TC, RA, Z flags are set, the query can be rejected. RD, AD, CD and DO are legitimately set by some software."
340 echo "graph_title Unbound DNS histogram of reply time"
341 echo "graph_args --base 1000 -l 0"
342 echo "graph_vlabel queries / second"
343 echo "graph_category DNS"
344 echo hcache.label "cache hits"
346 echo hcache.draw AREA
347 echo hcache.colour 999999
348 echo h64ms.label "0 msec - 66 msec"
350 echo h64ms.draw STACK
351 echo h64ms.colour 0000FF
352 echo h128ms.label "66 msec - 131 msec"
354 echo h128ms.colour 1F00DF
355 echo h128ms.draw STACK
356 echo h256ms.label "131 msec - 262 msec"
358 echo h256ms.draw STACK
359 echo h256ms.colour 3F00BF
360 echo h512ms.label "262 msec - 524 msec"
362 echo h512ms.draw STACK
363 echo h512ms.colour 5F009F
364 echo h1s.label "524 msec - 1 sec"
367 echo h1s.colour 7F007F
368 echo h2s.label "1 sec - 2 sec"
371 echo h2s.colour 9F005F
372 echo h4s.label "2 sec - 4 sec"
375 echo h4s.colour BF003F
376 echo h8s.label "4 sec - 8 sec"
379 echo h8s.colour DF001F
380 echo h16s.label "8 sec - ..."
383 echo h16s.colour FF0000
384 echo "graph_info Histogram of the reply times for queries."
391 # do the stats itself
394 # get the time elapsed
395 get_value "time.elapsed"
396 if test $value = 0 || test $value = "0.000000"; then
397 echo "error: time elapsed 0 or could not retrieve data"
402 # print value for $1 / elapsed
404 mn=`echo $1 | sed $ABBREV | tr . _`
406 echo "$mn.value" `echo scale=6';' $value / $elapsed | bc `
409 # print qps if line already found in $2
411 mn=`echo $1 | sed $ABBREV | tr . _`
412 value="`echo $2 | sed -e 's/^.*=//'`"
413 echo "$mn.value" `echo scale=6';' $value / $elapsed | bc `
418 mn=`echo $1 | sed $ABBREV | tr . _`
420 echo "$mn.value" $value
425 for x in `grep "^thread[0-9][0-9]*\.num\.queries=" $state |
426 sed -e 's/=.*//'` total.num.queries \
427 total.num.cachehits total.num.prefetch num.query.tcp \
428 num.query.ipv6 unwanted.queries unwanted.replies; do
429 if grep "^"$x"=" $state >/dev/null 2>&1; then
435 for x in total.requestlist.avg total.requestlist.max \
436 total.requestlist.overwritten total.requestlist.exceeded; do
441 mn=`echo mem.total.sbrk | sed $ABBREV | tr . _`
442 get_value 'mem.total.sbrk'
443 if test $value -eq 0; then
444 chk=`echo $ctrl | sed -e 's/-control$/-checkconf/'`
445 pidf=`$chk -o pidfile $conf 2>&1`
447 value=`ps -p "$pid" -o rss= 2>&1`
448 if test "`expr $value + 1 - 1 2>&1`" -eq "$value" 2>&1; then
449 value=`expr $value \* 1024`
454 echo "$mn.value" $value
455 for x in mem.cache.rrset mem.cache.message \
456 mem.mod.iterator mem.mod.validator; do
461 for x in `grep "^num.query.type" $state`; do
462 nm=`echo $x | sed -e 's/=.*$//'`
463 print_qps_line $nm $x
467 for x in `grep "^num.query.class" $state`; do
468 nm=`echo $x | sed -e 's/=.*$//'`
469 print_qps_line $nm $x
473 for x in `grep "^num.query.opcode" $state`; do
474 nm=`echo $x | sed -e 's/=.*$//'`
475 print_qps_line $nm $x
479 for x in `grep "^num.answer.rcode" $state`; do
480 nm=`echo $x | sed -e 's/=.*$//'`
481 print_qps_line $nm $x
483 print_qps "num.answer.secure"
484 print_qps "num.answer.bogus"
485 print_qps "num.rrset.bogus"
488 for x in num.query.flags.QR num.query.flags.AA num.query.flags.TC num.query.flags.RD num.query.flags.RA num.query.flags.Z num.query.flags.AD num.query.flags.CD num.query.edns.present num.query.edns.DO; do
493 get_value total.num.cachehits
494 echo hcache.value `echo scale=6';' $value / $elapsed | bc `
496 for x in histogram.000000.000000.to.000000.000001 \
497 histogram.000000.000001.to.000000.000002 \
498 histogram.000000.000002.to.000000.000004 \
499 histogram.000000.000004.to.000000.000008 \
500 histogram.000000.000008.to.000000.000016 \
501 histogram.000000.000016.to.000000.000032 \
502 histogram.000000.000032.to.000000.000064 \
503 histogram.000000.000064.to.000000.000128 \
504 histogram.000000.000128.to.000000.000256 \
505 histogram.000000.000256.to.000000.000512 \
506 histogram.000000.000512.to.000000.001024 \
507 histogram.000000.001024.to.000000.002048 \
508 histogram.000000.002048.to.000000.004096 \
509 histogram.000000.004096.to.000000.008192 \
510 histogram.000000.008192.to.000000.016384 \
511 histogram.000000.016384.to.000000.032768 \
512 histogram.000000.032768.to.000000.065536; do
516 echo h64ms.value `echo scale=6';' $r / $elapsed | bc `
517 get_value histogram.000000.065536.to.000000.131072
518 echo h128ms.value `echo scale=6';' $value / $elapsed | bc `
519 get_value histogram.000000.131072.to.000000.262144
520 echo h256ms.value `echo scale=6';' $value / $elapsed | bc `
521 get_value histogram.000000.262144.to.000000.524288
522 echo h512ms.value `echo scale=6';' $value / $elapsed | bc `
523 get_value histogram.000000.524288.to.000001.000000
524 echo h1s.value `echo scale=6';' $value / $elapsed | bc `
525 get_value histogram.000001.000000.to.000002.000000
526 echo h2s.value `echo scale=6';' $value / $elapsed | bc `
527 get_value histogram.000002.000000.to.000004.000000
528 echo h4s.value `echo scale=6';' $value / $elapsed | bc `
529 get_value histogram.000004.000000.to.000008.000000
530 echo h8s.value `echo scale=6';' $value / $elapsed | bc `
532 for x in histogram.000008.000000.to.000016.000000 \
533 histogram.000016.000000.to.000032.000000 \
534 histogram.000032.000000.to.000064.000000 \
535 histogram.000064.000000.to.000128.000000 \
536 histogram.000128.000000.to.000256.000000 \
537 histogram.000256.000000.to.000512.000000 \
538 histogram.000512.000000.to.001024.000000 \
539 histogram.001024.000000.to.002048.000000 \
540 histogram.002048.000000.to.004096.000000 \
541 histogram.004096.000000.to.008192.000000 \
542 histogram.008192.000000.to.016384.000000 \
543 histogram.016384.000000.to.032768.000000 \
544 histogram.032768.000000.to.065536.000000 \
545 histogram.065536.000000.to.131072.000000 \
546 histogram.131072.000000.to.262144.000000 \
547 histogram.262144.000000.to.524288.000000; do
551 echo h16s.value `echo scale=6';' $r / $elapsed | bc `