]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/unbound_munin_
import unbound 1.4.22
[FreeBSD/FreeBSD.git] / contrib / unbound_munin_
1 #!/bin/sh
2 #
3 # plugin for munin to monitor usage of unbound servers.
4 # To install copy this to /usr/local/share/munin/plugins/unbound_munin_
5 # and use munin-node-configure (--suggest, --shell).
6 #
7 # (C) 2008 W.C.A. Wijngaards.  BSD Licensed.
8 #
9 # To install; enable statistics and unbound-control in unbound.conf
10 #       server:         extended-statistics: yes
11 #                       statistics-cumulative: no
12 #                       statistics-interval: 0
13 #       remote-control: control-enable: yes
14 # Run the command unbound-control-setup to generate the key files.
15 #
16 # Environment variables for this script
17 #       statefile       - where to put temporary statefile.
18 #       unbound_conf    - where the unbound.conf file is located.
19 #       unbound_control - where to find unbound-control executable.
20 #       spoof_warn      - what level to warn about spoofing
21 #       spoof_crit      - what level to crit about spoofing
22 #
23 # You can set them in your munin/plugin-conf.d/plugins.conf file
24 # with:
25 # [unbound*]
26 # user root
27 # env.statefile /usr/local/var/munin/plugin-state/unbound-state
28 # env.unbound_conf /usr/local/etc/unbound/unbound.conf
29 # env.unbound_control /usr/local/sbin/unbound-control
30 # env.spoof_warn 1000
31 # env.spoof_crit 100000
32 #
33 # This plugin can create different graphs depending on what name
34 # you link it as (with ln -s) into the plugins directory
35 # You can link it multiple times.
36 # If you are only a casual user, the _hits and _by_type are most interesting,
37 # possibly followed by _by_rcode.
38 #
39 #       unbound_munin_hits      - base volume, cache hits, unwanted traffic
40 #       unbound_munin_queue     - to monitor the internal requestlist
41 #       unbound_munin_memory    - memory usage
42 #       unbound_munin_by_type   - incoming queries by type
43 #       unbound_munin_by_class  - incoming queries by class
44 #       unbound_munin_by_opcode - incoming queries by opcode
45 #       unbound_munin_by_rcode  - answers by rcode, validation status
46 #       unbound_munin_by_flags  - incoming queries by flags
47 #       unbound_munin_histogram - histogram of query resolving times
48 #
49 # Magic markers - optional - used by installation scripts and
50 # munin-config:  (originally contrib family but munin-node-configure ignores it)
51 #
52 #%# family=auto
53 #%# capabilities=autoconf suggest
54
55 # POD documentation
56 : <<=cut
57 =head1 NAME
58
59 unbound_munin_ - Munin plugin to monitor the Unbound DNS resolver.
60
61 =head1 APPLICABLE SYSTEMS
62
63 System with unbound daemon.
64
65 =head1 CONFIGURATION
66
67   [unbound*]
68   user root
69   env.statefile /usr/local/var/munin/plugin-state/unbound-state
70   env.unbound_conf /usr/local/etc/unbound/unbound.conf
71   env.unbound_control /usr/local/sbin/unbound-control
72   env.spoof_warn 1000
73   env.spoof_crit 100000
74
75 Use the .env settings to override the defaults.
76
77 =head1 USAGE
78
79 Can be used to present different graphs. Use ln -s for that name in
80 the plugins directory to enable the graph.
81 unbound_munin_hits      - base volume, cache hits, unwanted traffic
82 unbound_munin_queue     - to monitor the internal requestlist
83 unbound_munin_memory    - memory usage
84 unbound_munin_by_type   - incoming queries by type
85 unbound_munin_by_class  - incoming queries by class
86 unbound_munin_by_opcode - incoming queries by opcode
87 unbound_munin_by_rcode  - answers by rcode, validation status
88 unbound_munin_by_flags  - incoming queries by flags
89 unbound_munin_histogram - histogram of query resolving times
90
91 =head1 AUTHOR
92
93 Copyright 2008 W.C.A. Wijngaards
94
95 =head1 LICENSE
96
97 BSD
98
99 =cut
100
101 state=${statefile:-/usr/local/var/munin/plugin-state/unbound-state}
102 conf=${unbound_conf:-/usr/local/etc/unbound/unbound.conf}
103 ctrl=${unbound_control:-/usr/local/sbin/unbound-control}
104 warn=${spoof_warn:-1000}
105 crit=${spoof_crit:-100000}
106 lock=$state.lock
107
108 # number of seconds between polling attempts.
109 # makes the statefile hang around for at least this many seconds,
110 # so that multiple links of this script can share the results.
111 lee=55
112
113 # to keep things within 19 characters
114 ABBREV="-e s/total/t/ -e s/thread/t/ -e s/num/n/ -e s/query/q/ -e s/answer/a/ -e s/unwanted/u/ -e s/requestlist/ql/ -e s/type/t/ -e s/class/c/ -e s/opcode/o/ -e s/rcode/r/ -e s/edns/e/ -e s/mem/m/ -e s/cache/c/ -e s/mod/m/"
115
116 # get value from $1 into return variable $value
117 get_value ( ) {
118         value="`grep '^'$1'=' $state | sed -e 's/^.*=//'`"
119         if test "$value"x = ""x; then
120                 value="0"
121         fi
122 }
123
124 # download the state from the unbound server.
125 get_state ( ) {
126         # obtain lock for fetching the state
127         # because there is a race condition in fetching and writing to file
128
129         # see if the lock is stale, if so, take it 
130         if test -f $lock ; then
131                 pid="`cat $lock 2>&1`"
132                 kill -0 "$pid" >/dev/null 2>&1
133                 if test $? -ne 0 -a "$pid" != $$ ; then
134                         echo $$ >$lock
135                 fi
136         fi
137
138         i=0
139         while test ! -f $lock || test "`cat $lock 2>&1`" != $$; do
140                 while test -f $lock; do
141                         # wait
142                         i=`expr $i + 1`
143                         if test $i -gt 1000; then
144                                 sleep 1;
145                         fi
146                         if test $i -gt 1500; then
147                                 echo "error locking $lock" "=" `cat $lock`
148                                 rm -f $lock
149                                 exit 1
150                         fi
151                 done
152                 # try to get it
153                 echo $$ >$lock
154         done
155         # do not refetch if the file exists and only LEE seconds old
156         if test -f $state; then
157                 now=`date +%s`
158                 get_value "time.now"
159                 value="`echo $value | sed -e 's/\..*$//'`"
160                 if test $now -lt `expr $value + $lee`; then
161                         rm -f $lock
162                         return
163                 fi
164         fi
165         $ctrl -c $conf stats > $state
166         if test $? -ne 0; then
167                 echo "error retrieving data from unbound server"
168                 rm -f $lock
169                 exit 1
170         fi
171         rm -f $lock
172 }
173
174 if test "$1" = "autoconf" ; then
175         if test ! -f $conf; then
176                 echo no "($conf does not exist)"
177                 exit 1
178         fi
179         if test ! -d `dirname $state`; then
180                 echo no "(`dirname $state` directory does not exist)"
181                 exit 1
182         fi
183         echo yes
184         exit 0
185 fi
186
187 if test "$1" = "suggest" ; then
188         echo "hits"
189         echo "queue"
190         echo "memory"
191         echo "by_type"
192         echo "by_class"
193         echo "by_opcode"
194         echo "by_rcode"
195         echo "by_flags"
196         echo "histogram"
197         exit 0
198 fi
199
200 # determine my type, by name
201 id=`echo $0 | sed -e 's/^.*unbound_munin_//'`
202 if test "$id"x = ""x; then
203         # some default to keep people sane.
204         id="hits"
205 fi
206
207 # if $1 exists in statefile, config is echoed with label $2
208 exist_config ( ) {
209         mn=`echo $1 | sed $ABBREV | tr . _`
210         if grep '^'$1'=' $state >/dev/null 2>&1; then
211                 echo "$mn.label $2"
212                 echo "$mn.min 0"
213         fi
214 }
215
216 # print label and min 0 for a name $1 in unbound format
217 p_config ( ) {
218         mn=`echo $1 | sed $ABBREV | tr . _`
219         echo $mn.label "$2"
220         echo $mn.min 0
221 }
222
223 if test "$1" = "config" ; then
224         if test ! -f $state; then
225                 get_state
226         fi
227         case $id in
228         hits)
229                 echo "graph_title Unbound DNS traffic and cache hits"
230                 echo "graph_args --base 1000 -l 0"
231                 echo "graph_vlabel queries / second"
232                 echo "graph_category DNS"
233                 for x in `grep "^thread[0-9][0-9]*\.num\.queries=" $state |
234                         sed -e 's/=.*//'`; do
235                         exist_config $x "queries handled by `basename $x .num.queries`"
236                 done
237                 p_config "total.num.queries" "total queries from clients"
238                 p_config "total.num.cachehits" "cache hits"
239                 p_config "total.num.prefetch" "cache prefetch"
240                 p_config "num.query.tcp" "TCP queries"
241                 p_config "num.query.ipv6" "IPv6 queries"
242                 p_config "unwanted.queries" "queries that failed acl"
243                 p_config "unwanted.replies" "unwanted or unsolicited replies"
244                 echo "u_replies.warning $warn"
245                 echo "u_replies.critical $crit"
246                 echo "graph_info DNS queries to the recursive resolver. The unwanted replies could be innocent duplicate packets, late replies, or spoof threats."
247                 ;;
248         queue)
249                 echo "graph_title Unbound requestlist size"
250                 echo "graph_args --base 1000 -l 0"
251                 echo "graph_vlabel number of queries"
252                 echo "graph_category DNS"
253                 p_config "total.requestlist.avg" "Average size of queue on insert"
254                 p_config "total.requestlist.max" "Max size of queue (in 5 min)"
255                 p_config "total.requestlist.overwritten" "Number of queries replaced by new ones"
256                 p_config "total.requestlist.exceeded" "Number of queries dropped due to lack of space"
257                 echo "graph_info The queries that did not hit the cache and need recursion service take up space in the requestlist. If there are too many queries, first queries get overwritten, and at last resort dropped."
258                 ;;
259         memory)
260                 echo "graph_title Unbound memory usage"
261                 echo "graph_args --base 1024 -l 0"
262                 echo "graph_vlabel memory used in bytes"
263                 echo "graph_category DNS"
264                 p_config "mem.total.sbrk" "Total memory"
265                 p_config "mem.cache.rrset" "RRset cache memory"
266                 p_config "mem.cache.message" "Message cache memory"
267                 p_config "mem.mod.iterator" "Iterator module memory"
268                 p_config "mem.mod.validator" "Validator module and key cache memory"
269                 echo "graph_info The memory used by unbound."
270                 ;;
271         by_type)
272                 echo "graph_title Unbound DNS queries by type"
273                 echo "graph_args --base 1000 -l 0"
274                 echo "graph_vlabel queries / second"
275                 echo "graph_category DNS"
276                 for x in `grep "^num.query.type" $state`; do
277                         nm=`echo $x | sed -e 's/=.*$//'`
278                         tp=`echo $nm | sed -e s/num.query.type.//`
279                         p_config "$nm" "$tp"
280                 done
281                 echo "graph_info queries by DNS RR type queried for"
282                 ;;
283         by_class)
284                 echo "graph_title Unbound DNS queries by class"
285                 echo "graph_args --base 1000 -l 0"
286                 echo "graph_vlabel queries / second"
287                 echo "graph_category DNS"
288                 for x in `grep "^num.query.class" $state`; do
289                         nm=`echo $x | sed -e 's/=.*$//'`
290                         tp=`echo $nm | sed -e s/num.query.class.//`
291                         p_config "$nm" "$tp"
292                 done
293                 echo "graph_info queries by DNS RR class queried for."
294                 ;;
295         by_opcode)
296                 echo "graph_title Unbound DNS queries by opcode"
297                 echo "graph_args --base 1000 -l 0"
298                 echo "graph_vlabel queries / second"
299                 echo "graph_category DNS"
300                 for x in `grep "^num.query.opcode" $state`; do
301                         nm=`echo $x | sed -e 's/=.*$//'`
302                         tp=`echo $nm | sed -e s/num.query.opcode.//`
303                         p_config "$nm" "$tp"
304                 done
305                 echo "graph_info queries by opcode in the query packet."
306                 ;;
307         by_rcode)
308                 echo "graph_title Unbound DNS answers by return code"
309                 echo "graph_args --base 1000 -l 0"
310                 echo "graph_vlabel answer packets / second"
311                 echo "graph_category DNS"
312                 for x in `grep "^num.answer.rcode" $state`; do
313                         nm=`echo $x | sed -e 's/=.*$//'`
314                         tp=`echo $nm | sed -e s/num.answer.rcode.//`
315                         p_config "$nm" "$tp"
316                 done
317                 p_config "num.answer.secure" "answer secure"
318                 p_config "num.answer.bogus" "answer bogus"
319                 p_config "num.rrset.bogus" "num rrsets marked bogus"
320                 echo "graph_info answers sorted by return value. rrsets bogus is the number of rrsets marked bogus per second by the validator"
321                 ;;
322         by_flags)
323                 echo "graph_title Unbound DNS incoming queries by flags"
324                 echo "graph_args --base 1000 -l 0"
325                 echo "graph_vlabel queries / second"
326                 echo "graph_category DNS"
327                 p_config "num.query.flags.QR" "QR (query reply) flag"
328                 p_config "num.query.flags.AA" "AA (auth answer) flag"
329                 p_config "num.query.flags.TC" "TC (truncated) flag"
330                 p_config "num.query.flags.RD" "RD (recursion desired) flag"
331                 p_config "num.query.flags.RA" "RA (rec avail) flag"
332                 p_config "num.query.flags.Z" "Z (zero) flag"
333                 p_config "num.query.flags.AD" "AD (auth data) flag"
334                 p_config "num.query.flags.CD" "CD (check disabled) flag"
335                 p_config "num.query.edns.present" "EDNS OPT present"
336                 p_config "num.query.edns.DO" "DO (DNSSEC OK) flag"
337                 echo "graph_info This graphs plots the flags inside incoming queries. For example, if QR, AA, TC, RA, Z flags are set, the query can be rejected. RD, AD, CD and DO are legitimately set by some software."
338                 ;;
339         histogram)
340                 echo "graph_title Unbound DNS histogram of reply time"
341                 echo "graph_args --base 1000 -l 0"
342                 echo "graph_vlabel queries / second"
343                 echo "graph_category DNS"
344                 echo hcache.label "cache hits"
345                 echo hcache.min 0
346                 echo hcache.draw AREA
347                 echo hcache.colour 999999
348                 echo h64ms.label "0 msec - 66 msec"
349                 echo h64ms.min 0
350                 echo h64ms.draw STACK
351                 echo h64ms.colour 0000FF
352                 echo h128ms.label "66 msec - 131 msec"
353                 echo h128ms.min 0
354                 echo h128ms.colour 1F00DF
355                 echo h128ms.draw STACK
356                 echo h256ms.label "131 msec - 262 msec"
357                 echo h256ms.min 0
358                 echo h256ms.draw STACK
359                 echo h256ms.colour 3F00BF
360                 echo h512ms.label "262 msec - 524 msec"
361                 echo h512ms.min 0
362                 echo h512ms.draw STACK
363                 echo h512ms.colour 5F009F
364                 echo h1s.label "524 msec - 1 sec"
365                 echo h1s.min 0
366                 echo h1s.draw STACK
367                 echo h1s.colour 7F007F
368                 echo h2s.label "1 sec - 2 sec"
369                 echo h2s.min 0
370                 echo h2s.draw STACK
371                 echo h2s.colour 9F005F
372                 echo h4s.label "2 sec - 4 sec"
373                 echo h4s.min 0
374                 echo h4s.draw STACK
375                 echo h4s.colour BF003F
376                 echo h8s.label "4 sec - 8 sec"
377                 echo h8s.min 0
378                 echo h8s.draw STACK
379                 echo h8s.colour DF001F
380                 echo h16s.label "8 sec - ..."
381                 echo h16s.min 0
382                 echo h16s.draw STACK
383                 echo h16s.colour FF0000
384                 echo "graph_info Histogram of the reply times for queries."
385                 ;;
386         esac
387
388         exit 0
389 fi
390
391 # do the stats itself
392 get_state
393
394 # get the time elapsed
395 get_value "time.elapsed"
396 if test $value = 0 || test $value = "0.000000"; then
397         echo "error: time elapsed 0 or could not retrieve data"
398         exit 1
399 fi
400 elapsed="$value"
401
402 # print value for $1 / elapsed
403 print_qps ( ) {
404         mn=`echo $1 | sed $ABBREV | tr . _`
405         get_value $1
406         echo "$mn.value" `echo scale=6';' $value / $elapsed | bc `
407 }
408
409 # print qps if line already found in $2
410 print_qps_line ( ) {
411         mn=`echo $1 | sed $ABBREV | tr . _`
412         value="`echo $2 | sed -e 's/^.*=//'`"
413         echo "$mn.value" `echo scale=6';' $value / $elapsed | bc `
414 }
415
416 # print value for $1
417 print_value ( ) {
418         mn=`echo $1 | sed $ABBREV | tr . _`
419         get_value $1
420         echo "$mn.value" $value
421 }
422
423 case $id in
424 hits)
425         for x in `grep "^thread[0-9][0-9]*\.num\.queries=" $state |
426                 sed -e 's/=.*//'` total.num.queries \
427                 total.num.cachehits total.num.prefetch num.query.tcp \
428                 num.query.ipv6 unwanted.queries unwanted.replies; do
429                 if grep "^"$x"=" $state >/dev/null 2>&1; then
430                         print_qps $x
431                 fi
432         done
433         ;;
434 queue)
435         for x in total.requestlist.avg total.requestlist.max \
436                 total.requestlist.overwritten total.requestlist.exceeded; do
437                 print_value $x
438         done
439         ;;
440 memory)
441         mn=`echo mem.total.sbrk | sed $ABBREV | tr . _`
442         get_value 'mem.total.sbrk'
443         if test $value -eq 0; then
444                 chk=`echo $ctrl | sed -e 's/-control$/-checkconf/'`
445                 pidf=`$chk -o pidfile $conf 2>&1`
446                 pid=`cat $pidf 2>&1`
447                 value=`ps -p "$pid" -o rss= 2>&1`
448                 if test "`expr $value + 1 - 1 2>&1`" -eq "$value" 2>&1; then
449                         value=`expr $value \* 1024` 
450                 else
451                         value=0
452                 fi
453         fi
454         echo "$mn.value" $value
455         for x in mem.cache.rrset mem.cache.message \
456                 mem.mod.iterator mem.mod.validator; do
457                 print_value $x
458         done
459         ;;
460 by_type)
461         for x in `grep "^num.query.type" $state`; do
462                 nm=`echo $x | sed -e 's/=.*$//'`
463                 print_qps_line $nm $x
464         done
465         ;;
466 by_class)
467         for x in `grep "^num.query.class" $state`; do
468                 nm=`echo $x | sed -e 's/=.*$//'`
469                 print_qps_line $nm $x
470         done
471         ;;
472 by_opcode)
473         for x in `grep "^num.query.opcode" $state`; do
474                 nm=`echo $x | sed -e 's/=.*$//'`
475                 print_qps_line $nm $x
476         done
477         ;;
478 by_rcode)
479         for x in `grep "^num.answer.rcode" $state`; do
480                 nm=`echo $x | sed -e 's/=.*$//'`
481                 print_qps_line $nm $x
482         done
483         print_qps "num.answer.secure"
484         print_qps "num.answer.bogus"
485         print_qps "num.rrset.bogus"
486         ;;
487 by_flags)
488         for x in num.query.flags.QR num.query.flags.AA num.query.flags.TC num.query.flags.RD num.query.flags.RA num.query.flags.Z num.query.flags.AD num.query.flags.CD num.query.edns.present num.query.edns.DO; do
489                 print_qps $x
490         done
491         ;;
492 histogram)
493         get_value total.num.cachehits
494         echo hcache.value `echo scale=6';' $value / $elapsed | bc `
495         r=0
496         for x in histogram.000000.000000.to.000000.000001 \
497                 histogram.000000.000001.to.000000.000002 \
498                 histogram.000000.000002.to.000000.000004 \
499                 histogram.000000.000004.to.000000.000008 \
500                 histogram.000000.000008.to.000000.000016 \
501                 histogram.000000.000016.to.000000.000032 \
502                 histogram.000000.000032.to.000000.000064 \
503                 histogram.000000.000064.to.000000.000128 \
504                 histogram.000000.000128.to.000000.000256 \
505                 histogram.000000.000256.to.000000.000512 \
506                 histogram.000000.000512.to.000000.001024 \
507                 histogram.000000.001024.to.000000.002048 \
508                 histogram.000000.002048.to.000000.004096 \
509                 histogram.000000.004096.to.000000.008192 \
510                 histogram.000000.008192.to.000000.016384 \
511                 histogram.000000.016384.to.000000.032768 \
512                 histogram.000000.032768.to.000000.065536; do
513                 get_value $x
514                 r=`expr $r + $value`
515         done
516         echo h64ms.value `echo scale=6';' $r / $elapsed | bc `
517         get_value histogram.000000.065536.to.000000.131072
518         echo h128ms.value `echo scale=6';' $value / $elapsed | bc `
519         get_value histogram.000000.131072.to.000000.262144
520         echo h256ms.value `echo scale=6';' $value / $elapsed | bc `
521         get_value histogram.000000.262144.to.000000.524288
522         echo h512ms.value `echo scale=6';' $value / $elapsed | bc `
523         get_value histogram.000000.524288.to.000001.000000
524         echo h1s.value `echo scale=6';' $value / $elapsed | bc `
525         get_value histogram.000001.000000.to.000002.000000
526         echo h2s.value `echo scale=6';' $value / $elapsed | bc `
527         get_value histogram.000002.000000.to.000004.000000
528         echo h4s.value `echo scale=6';' $value / $elapsed | bc `
529         get_value histogram.000004.000000.to.000008.000000
530         echo h8s.value `echo scale=6';' $value / $elapsed | bc `
531         r=0
532         for x in histogram.000008.000000.to.000016.000000 \
533                 histogram.000016.000000.to.000032.000000 \
534                 histogram.000032.000000.to.000064.000000 \
535                 histogram.000064.000000.to.000128.000000 \
536                 histogram.000128.000000.to.000256.000000 \
537                 histogram.000256.000000.to.000512.000000 \
538                 histogram.000512.000000.to.001024.000000 \
539                 histogram.001024.000000.to.002048.000000 \
540                 histogram.002048.000000.to.004096.000000 \
541                 histogram.004096.000000.to.008192.000000 \
542                 histogram.008192.000000.to.016384.000000 \
543                 histogram.016384.000000.to.032768.000000 \
544                 histogram.032768.000000.to.065536.000000 \
545                 histogram.065536.000000.to.131072.000000 \
546                 histogram.131072.000000.to.262144.000000 \
547                 histogram.262144.000000.to.524288.000000; do
548                 get_value $x
549                 r=`expr $r + $value`
550         done
551         echo h16s.value `echo scale=6';' $r / $elapsed | bc `
552         ;;
553 esac