]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/unbound/contrib/unbound_munin_
Upgrade LDNS to 1.7.0.
[FreeBSD/FreeBSD.git] / contrib / unbound / contrib / unbound_munin_
1 #!/bin/sh
2 #
3 # plugin for munin to monitor usage of unbound servers.
4 # To install copy this to /usr/local/share/munin/plugins/unbound_munin_
5 # and use munin-node-configure (--suggest, --shell).
6 #
7 # (C) 2008 W.C.A. Wijngaards.  BSD Licensed.
8 #
9 # To install; enable statistics and unbound-control in unbound.conf
10 #       server:         extended-statistics: yes
11 #                       statistics-cumulative: no
12 #                       statistics-interval: 0
13 #       remote-control: control-enable: yes
14 # Run the command unbound-control-setup to generate the key files.
15 #
16 # Environment variables for this script
17 #       statefile       - where to put temporary statefile.
18 #       unbound_conf    - where the unbound.conf file is located.
19 #       unbound_control - where to find unbound-control executable.
20 #       spoof_warn      - what level to warn about spoofing
21 #       spoof_crit      - what level to crit about spoofing
22 #
23 # You can set them in your munin/plugin-conf.d/plugins.conf file
24 # with:
25 # [unbound*]
26 # user root
27 # env.statefile /usr/local/var/munin/plugin-state/unbound-state
28 # env.unbound_conf /usr/local/etc/unbound/unbound.conf
29 # env.unbound_control /usr/local/sbin/unbound-control
30 # env.spoof_warn 1000
31 # env.spoof_crit 100000
32 #
33 # This plugin can create different graphs depending on what name
34 # you link it as (with ln -s) into the plugins directory
35 # You can link it multiple times.
36 # If you are only a casual user, the _hits and _by_type are most interesting,
37 # possibly followed by _by_rcode.
38 #
39 #       unbound_munin_hits      - base volume, cache hits, unwanted traffic
40 #       unbound_munin_queue     - to monitor the internal requestlist
41 #       unbound_munin_memory    - memory usage
42 #       unbound_munin_by_type   - incoming queries by type
43 #       unbound_munin_by_class  - incoming queries by class
44 #       unbound_munin_by_opcode - incoming queries by opcode
45 #       unbound_munin_by_rcode  - answers by rcode, validation status
46 #       unbound_munin_by_flags  - incoming queries by flags
47 #       unbound_munin_histogram - histogram of query resolving times
48 #
49 # Magic markers - optional - used by installation scripts and
50 # munin-config:  (originally contrib family but munin-node-configure ignores it)
51 #
52 #%# family=auto
53 #%# capabilities=autoconf suggest
54
55 # POD documentation
56 : <<=cut
57 =head1 NAME
58
59 unbound_munin_ - Munin plugin to monitor the Unbound DNS resolver.
60
61 =head1 APPLICABLE SYSTEMS
62
63 System with unbound daemon.
64
65 =head1 CONFIGURATION
66
67   [unbound*]
68   user root
69   env.statefile /usr/local/var/munin/plugin-state/unbound-state
70   env.unbound_conf /usr/local/etc/unbound/unbound.conf
71   env.unbound_control /usr/local/sbin/unbound-control
72   env.spoof_warn 1000
73   env.spoof_crit 100000
74
75 Use the .env settings to override the defaults.
76
77 =head1 USAGE
78
79 Can be used to present different graphs. Use ln -s for that name in
80 the plugins directory to enable the graph.
81 unbound_munin_hits      - base volume, cache hits, unwanted traffic
82 unbound_munin_queue     - to monitor the internal requestlist
83 unbound_munin_memory    - memory usage
84 unbound_munin_by_type   - incoming queries by type
85 unbound_munin_by_class  - incoming queries by class
86 unbound_munin_by_opcode - incoming queries by opcode
87 unbound_munin_by_rcode  - answers by rcode, validation status
88 unbound_munin_by_flags  - incoming queries by flags
89 unbound_munin_histogram - histogram of query resolving times
90
91 =head1 AUTHOR
92
93 Copyright 2008 W.C.A. Wijngaards
94
95 =head1 LICENSE
96
97 BSD
98
99 =cut
100
101 state=${statefile:-/usr/local/var/munin/plugin-state/unbound-state}
102 conf=${unbound_conf:-/usr/local/etc/unbound/unbound.conf}
103 ctrl=${unbound_control:-/usr/local/sbin/unbound-control}
104 warn=${spoof_warn:-1000}
105 crit=${spoof_crit:-100000}
106 lock=$state.lock
107
108 # number of seconds between polling attempts.
109 # makes the statefile hang around for at least this many seconds,
110 # so that multiple links of this script can share the results.
111 lee=55
112
113 # to keep things within 19 characters
114 ABBREV="-e s/total/t/ -e s/thread/t/ -e s/num/n/ -e s/query/q/ -e s/answer/a/ -e s/unwanted/u/ -e s/requestlist/ql/ -e s/type/t/ -e s/class/c/ -e s/opcode/o/ -e s/rcode/r/ -e s/edns/e/ -e s/mem/m/ -e s/cache/c/ -e s/mod/m/"
115
116 # get value from $1 into return variable $value
117 get_value ( ) {
118         value="`grep '^'$1'=' $state | sed -e 's/^.*=//'`"
119         if test "$value"x = ""x; then
120                 value="0"
121         fi
122 }
123
124 # download the state from the unbound server.
125 get_state ( ) {
126         # obtain lock for fetching the state
127         # because there is a race condition in fetching and writing to file
128
129         # see if the lock is stale, if so, take it 
130         if test -f $lock ; then
131                 pid="`cat $lock 2>&1`"
132                 kill -0 "$pid" >/dev/null 2>&1
133                 if test $? -ne 0 -a "$pid" != $$ ; then
134                         echo $$ >$lock
135                 fi
136         fi
137
138         i=0
139         while test ! -f $lock || test "`cat $lock 2>&1`" != $$; do
140                 while test -f $lock; do
141                         # wait
142                         i=`expr $i + 1`
143                         if test $i -gt 1000; then
144                                 sleep 1;
145                         fi
146                         if test $i -gt 1500; then
147                                 echo "error locking $lock" "=" `cat $lock`
148                                 rm -f $lock
149                                 exit 1
150                         fi
151                 done
152                 # try to get it
153                 echo $$ >$lock
154         done
155         # do not refetch if the file exists and only LEE seconds old
156         if test -f $state; then
157                 now=`date +%s`
158                 get_value "time.now"
159                 value="`echo $value | sed -e 's/\..*$//'`"
160                 if test $now -lt `expr $value + $lee`; then
161                         rm -f $lock
162                         return
163                 fi
164         fi
165         $ctrl -c $conf stats > $state
166         if test $? -ne 0; then
167                 echo "error retrieving data from unbound server"
168                 rm -f $lock
169                 exit 1
170         fi
171         rm -f $lock
172 }
173
174 if test "$1" = "autoconf" ; then
175         if test ! -f $conf; then
176                 echo no "($conf does not exist)"
177                 exit 1
178         fi
179         if test ! -d `dirname $state`; then
180                 echo no "(`dirname $state` directory does not exist)"
181                 exit 1
182         fi
183         echo yes
184         exit 0
185 fi
186
187 if test "$1" = "suggest" ; then
188         echo "hits"
189         echo "queue"
190         echo "memory"
191         echo "by_type"
192         echo "by_class"
193         echo "by_opcode"
194         echo "by_rcode"
195         echo "by_flags"
196         echo "histogram"
197         exit 0
198 fi
199
200 # determine my type, by name
201 id=`echo $0 | sed -e 's/^.*unbound_munin_//'`
202 if test "$id"x = ""x; then
203         # some default to keep people sane.
204         id="hits"
205 fi
206
207 # if $1 exists in statefile, config is echoed with label $2
208 exist_config ( ) {
209         mn=`echo $1 | sed $ABBREV | tr . _`
210         if grep '^'$1'=' $state >/dev/null 2>&1; then
211                 echo "$mn.label $2"
212                 echo "$mn.min 0"
213                 echo "$mn.type ABSOLUTE"
214         fi
215 }
216
217 # print label and min 0 for a name $1 in unbound format
218 p_config ( ) {
219         mn=`echo $1 | sed $ABBREV | tr . _`
220         echo $mn.label "$2"
221         echo $mn.min 0
222         echo $mn.type $3
223 }
224
225 if test "$1" = "config" ; then
226         if test ! -f $state; then
227                 get_state
228         fi
229         case $id in
230         hits)
231                 echo "graph_title Unbound DNS traffic and cache hits"
232                 echo "graph_args --base 1000 -l 0"
233                 echo "graph_vlabel queries / \${graph_period}"
234                 echo "graph_scale no"
235                 echo "graph_category DNS"
236                 for x in `grep "^thread[0-9][0-9]*\.num\.queries=" $state |
237                         sed -e 's/=.*//'`; do
238                         exist_config $x "queries handled by `basename $x .num.queries`"
239                 done
240                 p_config "total.num.queries" "total queries from clients" "ABSOLUTE"
241                 p_config "total.num.cachehits" "cache hits" "ABSOLUTE"
242                 p_config "total.num.prefetch" "cache prefetch" "ABSOLUTE"
243                 p_config "num.query.tcp" "TCP queries" "ABSOLUTE"
244                 p_config "num.query.tcpout" "TCP out queries" "ABSOLUTE"
245                 p_config "num.query.ipv6" "IPv6 queries" "ABSOLUTE"
246                 p_config "unwanted.queries" "queries that failed acl" "ABSOLUTE"
247                 p_config "unwanted.replies" "unwanted or unsolicited replies" "ABSOLUTE"
248                 echo "u_replies.warning $warn"
249                 echo "u_replies.critical $crit"
250                 echo "graph_info DNS queries to the recursive resolver. The unwanted replies could be innocent duplicate packets, late replies, or spoof threats."
251                 ;;
252         queue)
253                 echo "graph_title Unbound requestlist size"
254                 echo "graph_args --base 1000 -l 0"
255                 echo "graph_vlabel number of queries"
256                 echo "graph_scale no"
257                 echo "graph_category DNS"
258                 p_config "total.requestlist.avg" "Average size of queue on insert" "GAUGE"
259                 p_config "total.requestlist.max" "Max size of queue (in 5 min)" "GAUGE"
260                 p_config "total.requestlist.overwritten" "Number of queries replaced by new ones" "GAUGE"
261                 p_config "total.requestlist.exceeded" "Number of queries dropped due to lack of space" "GAUGE"
262                 echo "graph_info The queries that did not hit the cache and need recursion service take up space in the requestlist. If there are too many queries, first queries get overwritten, and at last resort dropped."
263                 ;;
264         memory)
265                 echo "graph_title Unbound memory usage"
266                 echo "graph_args --base 1024 -l 0"
267                 echo "graph_vlabel memory used in bytes"
268                 echo "graph_category DNS"
269                 p_config "mem.total.sbrk" "Total memory" "GAUGE"
270                 p_config "mem.cache.rrset" "RRset cache memory" "GAUGE"
271                 p_config "mem.cache.message" "Message cache memory" "GAUGE"
272                 p_config "mem.mod.iterator" "Iterator module memory" "GAUGE"
273                 p_config "mem.mod.validator" "Validator module and key cache memory" "GAUGE"
274                 p_config "msg.cache.count" "msg cache count" "GAUGE"
275                 p_config "rrset.cache.count" "rrset cache count" "GAUGE"
276                 p_config "infra.cache.count" "infra cache count" "GAUGE"
277                 p_config "key.cache.count" "key cache count" "GAUGE"
278                 echo "graph_info The memory used by unbound."
279                 ;;
280         by_type)
281                 echo "graph_title Unbound DNS queries by type"
282                 echo "graph_args --base 1000 -l 0"
283                 echo "graph_vlabel queries / \${graph_period}"
284                 echo "graph_scale no"
285                 echo "graph_category DNS"
286                 for x in `grep "^num.query.type" $state`; do
287                         nm=`echo $x | sed -e 's/=.*$//'`
288                         tp=`echo $nm | sed -e s/num.query.type.//`
289                         p_config "$nm" "$tp" "ABSOLUTE"
290                 done
291                 echo "graph_info queries by DNS RR type queried for"
292                 ;;
293         by_class)
294                 echo "graph_title Unbound DNS queries by class"
295                 echo "graph_args --base 1000 -l 0"
296                 echo "graph_vlabel queries / \${graph_period}"
297                 echo "graph_scale no"
298                 echo "graph_category DNS"
299                 for x in `grep "^num.query.class" $state`; do
300                         nm=`echo $x | sed -e 's/=.*$//'`
301                         tp=`echo $nm | sed -e s/num.query.class.//`
302                         p_config "$nm" "$tp" "ABSOLUTE"
303                 done
304                 echo "graph_info queries by DNS RR class queried for."
305                 ;;
306         by_opcode)
307                 echo "graph_title Unbound DNS queries by opcode"
308                 echo "graph_args --base 1000 -l 0"
309                 echo "graph_vlabel queries / \${graph_period}"
310                 echo "graph_scale no"
311                 echo "graph_category DNS"
312                 for x in `grep "^num.query.opcode" $state`; do
313                         nm=`echo $x | sed -e 's/=.*$//'`
314                         tp=`echo $nm | sed -e s/num.query.opcode.//`
315                         p_config "$nm" "$tp" "ABSOLUTE"
316                 done
317                 echo "graph_info queries by opcode in the query packet."
318                 ;;
319         by_rcode)
320                 echo "graph_title Unbound DNS answers by return code"
321                 echo "graph_args --base 1000 -l 0"
322                 echo "graph_vlabel answer packets / \${graph_period}"
323                 echo "graph_scale no"
324                 echo "graph_category DNS"
325                 for x in `grep "^num.answer.rcode" $state`; do
326                         nm=`echo $x | sed -e 's/=.*$//'`
327                         tp=`echo $nm | sed -e s/num.answer.rcode.//`
328                         p_config "$nm" "$tp" "ABSOLUTE"
329                 done
330                 p_config "num.answer.secure" "answer secure" "ABSOLUTE"
331                 p_config "num.answer.bogus" "answer bogus" "ABSOLUTE"
332                 p_config "num.rrset.bogus" "num rrsets marked bogus" "ABSOLUTE"
333                 echo "graph_info answers sorted by return value. rrsets bogus is the number of rrsets marked bogus per \${graph_period} by the validator"
334                 ;;
335         by_flags)
336                 echo "graph_title Unbound DNS incoming queries by flags"
337                 echo "graph_args --base 1000 -l 0"
338                 echo "graph_vlabel queries / \${graph_period}"
339                 echo "graph_scale no"
340                 echo "graph_category DNS"
341                 p_config "num.query.flags.QR" "QR (query reply) flag" "ABSOLUTE"
342                 p_config "num.query.flags.AA" "AA (auth answer) flag" "ABSOLUTE"
343                 p_config "num.query.flags.TC" "TC (truncated) flag" "ABSOLUTE"
344                 p_config "num.query.flags.RD" "RD (recursion desired) flag" "ABSOLUTE"
345                 p_config "num.query.flags.RA" "RA (rec avail) flag" "ABSOLUTE"
346                 p_config "num.query.flags.Z" "Z (zero) flag" "ABSOLUTE"
347                 p_config "num.query.flags.AD" "AD (auth data) flag" "ABSOLUTE"
348                 p_config "num.query.flags.CD" "CD (check disabled) flag" "ABSOLUTE"
349                 p_config "num.query.edns.present" "EDNS OPT present" "ABSOLUTE"
350                 p_config "num.query.edns.DO" "DO (DNSSEC OK) flag" "ABSOLUTE"
351                 echo "graph_info This graphs plots the flags inside incoming queries. For example, if QR, AA, TC, RA, Z flags are set, the query can be rejected. RD, AD, CD and DO are legitimately set by some software."
352                 ;;
353         histogram)
354                 echo "graph_title Unbound DNS histogram of reply time"
355                 echo "graph_args --base 1000 -l 0"
356                 echo "graph_vlabel queries / \${graph_period}"
357                 echo "graph_scale no"
358                 echo "graph_category DNS"
359                 echo hcache.label "cache hits"
360                 echo hcache.min 0
361                 echo hcache.type ABSOLUTE
362                 echo hcache.draw AREA
363                 echo hcache.colour 999999
364                 echo h64ms.label "0 msec - 66 msec"
365                 echo h64ms.min 0
366                 echo h64ms.type ABSOLUTE
367                 echo h64ms.draw STACK
368                 echo h64ms.colour 0000FF
369                 echo h128ms.label "66 msec - 131 msec"
370                 echo h128ms.min 0
371                 echo h128ms.type ABSOLUTE
372                 echo h128ms.colour 1F00DF
373                 echo h128ms.draw STACK
374                 echo h256ms.label "131 msec - 262 msec"
375                 echo h256ms.min 0
376                 echo h256ms.type ABSOLUTE
377                 echo h256ms.draw STACK
378                 echo h256ms.colour 3F00BF
379                 echo h512ms.label "262 msec - 524 msec"
380                 echo h512ms.min 0
381                 echo h512ms.type ABSOLUTE
382                 echo h512ms.draw STACK
383                 echo h512ms.colour 5F009F
384                 echo h1s.label "524 msec - 1 sec"
385                 echo h1s.min 0
386                 echo h1s.type ABSOLUTE
387                 echo h1s.draw STACK
388                 echo h1s.colour 7F007F
389                 echo h2s.label "1 sec - 2 sec"
390                 echo h2s.min 0
391                 echo h2s.type ABSOLUTE
392                 echo h2s.draw STACK
393                 echo h2s.colour 9F005F
394                 echo h4s.label "2 sec - 4 sec"
395                 echo h4s.min 0
396                 echo h4s.type ABSOLUTE
397                 echo h4s.draw STACK
398                 echo h4s.colour BF003F
399                 echo h8s.label "4 sec - 8 sec"
400                 echo h8s.min 0
401                 echo h8s.type ABSOLUTE
402                 echo h8s.draw STACK
403                 echo h8s.colour DF001F
404                 echo h16s.label "8 sec - ..."
405                 echo h16s.min 0
406                 echo h16s.type ABSOLUTE
407                 echo h16s.draw STACK
408                 echo h16s.colour FF0000
409                 echo "graph_info Histogram of the reply times for queries."
410                 ;;
411         esac
412
413         exit 0
414 fi
415
416 # do the stats itself
417 get_state
418
419 # get the time elapsed
420 get_value "time.elapsed"
421 if test $value = 0 || test $value = "0.000000"; then
422         echo "error: time elapsed 0 or could not retrieve data"
423         exit 1
424 fi
425 elapsed="$value"
426
427 # print value for $1
428 print_value ( ) {
429         mn=`echo $1 | sed $ABBREV | tr . _`
430         get_value $1
431         echo "$mn.value" $value
432 }
433
434 # print value if line already found in $2
435 print_value_line ( ) {
436         mn=`echo $1 | sed $ABBREV | tr . _`
437         value="`echo $2 | sed -e 's/^.*=//'`"
438         echo "$mn.value" $value
439 }
440
441
442 case $id in
443 hits)
444         for x in `grep "^thread[0-9][0-9]*\.num\.queries=" $state |
445                 sed -e 's/=.*//'` total.num.queries \
446                 total.num.cachehits total.num.prefetch num.query.tcp \
447                 num.query.tcpout num.query.ipv6 unwanted.queries \
448                 unwanted.replies; do
449                 if grep "^"$x"=" $state >/dev/null 2>&1; then
450                         print_value $x
451                 fi
452         done
453         ;;
454 queue)
455         for x in total.requestlist.avg total.requestlist.max \
456                 total.requestlist.overwritten total.requestlist.exceeded; do
457                 print_value $x
458         done
459         ;;
460 memory)
461         mn=`echo mem.total.sbrk | sed $ABBREV | tr . _`
462         get_value 'mem.total.sbrk'
463         if test $value -eq 0; then
464                 chk=`echo $ctrl | sed -e 's/-control$/-checkconf/'`
465                 pidf=`$chk -o pidfile $conf 2>&1`
466                 pid=`cat $pidf 2>&1`
467                 value=`ps -p "$pid" -o rss= 2>&1`
468                 if test "`expr $value + 1 - 1 2>&1`" -eq "$value" 2>&1; then
469                         value=`expr $value \* 1024` 
470                 else
471                         value=0
472                 fi
473         fi
474         echo "$mn.value" $value
475         for x in mem.cache.rrset mem.cache.message mem.mod.iterator \
476                 mem.mod.validator msg.cache.count rrset.cache.count \
477                 infra.cache.count key.cache.count; do
478                 print_value $x
479         done
480         ;;
481 by_type)
482         for x in `grep "^num.query.type" $state`; do
483                 nm=`echo $x | sed -e 's/=.*$//'`
484                 print_value_line $nm $x
485         done
486         ;;
487 by_class)
488         for x in `grep "^num.query.class" $state`; do
489                 nm=`echo $x | sed -e 's/=.*$//'`
490                 print_value_line $nm $x
491         done
492         ;;
493 by_opcode)
494         for x in `grep "^num.query.opcode" $state`; do
495                 nm=`echo $x | sed -e 's/=.*$//'`
496                 print_value_line $nm $x
497         done
498         ;;
499 by_rcode)
500         for x in `grep "^num.answer.rcode" $state`; do
501                 nm=`echo $x | sed -e 's/=.*$//'`
502                 print_value_line $nm $x
503         done
504         print_value "num.answer.secure"
505         print_value "num.answer.bogus"
506         print_value "num.rrset.bogus"
507         ;;
508 by_flags)
509         for x in num.query.flags.QR num.query.flags.AA num.query.flags.TC num.query.flags.RD num.query.flags.RA num.query.flags.Z num.query.flags.AD num.query.flags.CD num.query.edns.present num.query.edns.DO; do
510                 print_value $x
511         done
512         ;;
513 histogram)
514         get_value total.num.cachehits
515         echo hcache.value $value
516         r=0
517         for x in histogram.000000.000000.to.000000.000001 \
518                 histogram.000000.000001.to.000000.000002 \
519                 histogram.000000.000002.to.000000.000004 \
520                 histogram.000000.000004.to.000000.000008 \
521                 histogram.000000.000008.to.000000.000016 \
522                 histogram.000000.000016.to.000000.000032 \
523                 histogram.000000.000032.to.000000.000064 \
524                 histogram.000000.000064.to.000000.000128 \
525                 histogram.000000.000128.to.000000.000256 \
526                 histogram.000000.000256.to.000000.000512 \
527                 histogram.000000.000512.to.000000.001024 \
528                 histogram.000000.001024.to.000000.002048 \
529                 histogram.000000.002048.to.000000.004096 \
530                 histogram.000000.004096.to.000000.008192 \
531                 histogram.000000.008192.to.000000.016384 \
532                 histogram.000000.016384.to.000000.032768 \
533                 histogram.000000.032768.to.000000.065536; do
534                 get_value $x
535                 r=`expr $r + $value`
536         done
537         echo h64ms.value $r
538         get_value histogram.000000.065536.to.000000.131072
539         echo h128ms.value $value
540         get_value histogram.000000.131072.to.000000.262144
541         echo h256ms.value $value
542         get_value histogram.000000.262144.to.000000.524288
543         echo h512ms.value $value
544         get_value histogram.000000.524288.to.000001.000000
545         echo h1s.value $value
546         get_value histogram.000001.000000.to.000002.000000
547         echo h2s.value $value
548         get_value histogram.000002.000000.to.000004.000000
549         echo h4s.value $value
550         get_value histogram.000004.000000.to.000008.000000
551         echo h8s.value $value
552         r=0
553         for x in histogram.000008.000000.to.000016.000000 \
554                 histogram.000016.000000.to.000032.000000 \
555                 histogram.000032.000000.to.000064.000000 \
556                 histogram.000064.000000.to.000128.000000 \
557                 histogram.000128.000000.to.000256.000000 \
558                 histogram.000256.000000.to.000512.000000 \
559                 histogram.000512.000000.to.001024.000000 \
560                 histogram.001024.000000.to.002048.000000 \
561                 histogram.002048.000000.to.004096.000000 \
562                 histogram.004096.000000.to.008192.000000 \
563                 histogram.008192.000000.to.016384.000000 \
564                 histogram.016384.000000.to.032768.000000 \
565                 histogram.032768.000000.to.065536.000000 \
566                 histogram.065536.000000.to.131072.000000 \
567                 histogram.131072.000000.to.262144.000000 \
568                 histogram.262144.000000.to.524288.000000; do
569                 get_value $x
570                 r=`expr $r + $value`
571         done
572         echo h16s.value $r
573         ;;
574 esac