]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/unbound/contrib/unbound_munin_
MFV: xz 5.4.4.
[FreeBSD/FreeBSD.git] / contrib / unbound / contrib / unbound_munin_
1 #!/bin/sh
2 #
3 # plugin for munin to monitor usage of unbound servers.
4 # To install copy this to /usr/local/share/munin/plugins/unbound_munin_
5 # and use munin-node-configure (--suggest, --shell).
6 #
7 # (C) 2008 W.C.A. Wijngaards.  BSD Licensed.
8 #
9 # To install; enable statistics and unbound-control in unbound.conf
10 #       server:         extended-statistics: yes
11 #                       statistics-cumulative: no
12 #                       statistics-interval: 0
13 #       remote-control: control-enable: yes
14 # Run the command unbound-control-setup to generate the key files.
15 #
16 # Environment variables for this script
17 #       unbound_conf    - where the unbound.conf file is located.
18 #       unbound_control - where to find unbound-control executable.
19 #       spoof_warn      - what level to warn about spoofing
20 #       spoof_crit      - what level to crit about spoofing
21 #
22 # You can set them in your munin/plugin-conf.d/plugins.conf file
23 # with:
24 # [unbound*]
25 # user root
26 # env.unbound_conf /usr/local/etc/unbound/unbound.conf
27 # env.unbound_control /usr/local/sbin/unbound-control
28 # env.spoof_warn 1000
29 # env.spoof_crit 100000
30 #
31 # This plugin can create different graphs depending on what name
32 # you link it as (with ln -s) into the plugins directory
33 # You can link it multiple times.
34 # If you are only a casual user, the _hits and _by_type are most interesting,
35 # possibly followed by _by_rcode.
36 #
37 #       unbound_munin_hits      - base volume, cache hits, unwanted traffic
38 #       unbound_munin_queue     - to monitor the internal requestlist
39 #       unbound_munin_memory    - memory usage
40 #       unbound_munin_by_type   - incoming queries by type
41 #       unbound_munin_by_class  - incoming queries by class
42 #       unbound_munin_by_opcode - incoming queries by opcode
43 #       unbound_munin_by_rcode  - answers by rcode, validation status
44 #       unbound_munin_by_flags  - incoming queries by flags
45 #       unbound_munin_histogram - histogram of query resolving times
46 #
47 # Magic markers - optional - used by installation scripts and
48 # munin-config:  (originally contrib family but munin-node-configure ignores it)
49 #
50 #%# family=auto
51 #%# capabilities=autoconf suggest
52
53 # POD documentation
54 : <<=cut
55 =head1 NAME
56
57 unbound_munin_ - Munin plugin to monitor the Unbound DNS resolver.
58
59 =head1 APPLICABLE SYSTEMS
60
61 System with unbound daemon.
62
63 =head1 CONFIGURATION
64
65   [unbound*]
66   user root
67   env.unbound_conf /usr/local/etc/unbound/unbound.conf
68   env.unbound_control /usr/local/sbin/unbound-control
69   env.spoof_warn 1000
70   env.spoof_crit 100000
71
72 Use the .env settings to override the defaults.
73
74 =head1 USAGE
75
76 Can be used to present different graphs. Use ln -s for that name in
77 the plugins directory to enable the graph.
78 unbound_munin_hits      - base volume, cache hits, unwanted traffic
79 unbound_munin_queue     - to monitor the internal requestlist
80 unbound_munin_memory    - memory usage
81 unbound_munin_by_type   - incoming queries by type
82 unbound_munin_by_class  - incoming queries by class
83 unbound_munin_by_opcode - incoming queries by opcode
84 unbound_munin_by_rcode  - answers by rcode, validation status
85 unbound_munin_by_flags  - incoming queries by flags
86 unbound_munin_histogram - histogram of query resolving times
87
88 =head1 AUTHOR
89
90 Copyright 2008 W.C.A. Wijngaards
91
92 =head1 LICENSE
93
94 BSD
95
96 =cut
97
98 state="${MUNIN_PLUGSTATE}/unbound.state"
99 seentags="${MUNIN_PLUGSTATE}/unbound-seentags.state"
100 conf=${unbound_conf:-/usr/local/etc/unbound/unbound.conf}
101 ctrl=${unbound_control:-/usr/local/sbin/unbound-control}
102 warn=${spoof_warn:-1000}
103 crit=${spoof_crit:-100000}
104 lock=$state.lock
105
106 # number of seconds between polling attempts.
107 # makes the statefile hang around for at least this many seconds,
108 # so that multiple links of this script can share the results.
109 lee=55
110
111 # to keep things within 19 characters
112 ABBREV="-e s/total/t/ -e s/thread/t/ -e s/num/n/ -e s/query/q/ -e s/answer/a/ -e s/unwanted/u/ -e s/requestlist/ql/ -e s/type/t/ -e s/class/c/ -e s/opcode/o/ -e s/rcode/r/ -e s/edns/e/ -e s/mem/m/ -e s/cache/c/ -e s/mod/m/"
113
114 # get value from $1 into return variable $value
115 get_value ( ) {
116         value="`grep '^'$1'=' $state | sed -e 's/^.*=//'`"
117         if test "$value"x = ""x; then
118                 value="0"
119         fi
120 }
121
122 # Update list of seen query types etc to seentags file. This is run while
123 # holding the lock, after the state file is updated.
124 update_seentags() {
125     tmplist="$(cat ${seentags} 2> /dev/null)
126 num.query.type.A
127 num.query.class.IN
128 num.query.opcode.QUERY
129 num.answer.rcode.NOERROR
130 "
131     (echo "${tmplist}"; grep ^num ${state} | sed -e 's/=.*//') | sort -u > ${seentags}
132 }
133
134 # download the state from the unbound server.
135 get_state ( ) {
136         # obtain lock for fetching the state
137         # because there is a race condition in fetching and writing to file
138
139         # see if the lock is stale, if so, take it
140         if test -f $lock ; then
141                 pid="`cat $lock 2>&1`"
142                 kill -0 "$pid" >/dev/null 2>&1
143                 if test $? -ne 0 -a "$pid" != $$ ; then
144                         echo $$ >$lock
145                 fi
146         fi
147
148         i=0
149         while test ! -f $lock || test "`cat $lock 2>&1`" != $$; do
150                 while test -f $lock; do
151                         # wait
152                         i=`expr $i + 1`
153                         if test $i -gt 1000; then
154                                 sleep 1;
155                         fi
156                         if test $i -gt 1500; then
157                                 echo "error locking $lock" "=" `cat $lock`
158                                 rm -f $lock
159                                 exit 1
160                         fi
161                 done
162                 # try to get it
163                 if echo $$ >$lock ; then : ; else break; fi
164         done
165         # do not refetch if the file exists and only LEE seconds old
166         if test -f $state; then
167                 now=`date +%s`
168                 get_value "time.now"
169                 value="`echo $value | sed -e 's/\..*$//'`"
170                 if test $now -lt `expr $value + $lee`; then
171                         rm -f $lock
172                         return
173                 fi
174         fi
175         $ctrl -c $conf stats > $state
176         if test $? -ne 0; then
177                 echo "error retrieving data from unbound server"
178                 rm -f $lock
179                 exit 1
180         fi
181         update_seentags
182         rm -f $lock
183 }
184
185 if test "$1" = "autoconf" ; then
186         if test ! -f $conf; then
187                 echo no "($conf does not exist)"
188                 exit 0
189         fi
190         if test ! -d `dirname $state`; then
191                 echo no "(`dirname $state` directory does not exist)"
192                 exit 0
193         fi
194         echo yes
195         exit 0
196 fi
197
198 if test "$1" = "suggest" ; then
199         echo "hits"
200         echo "queue"
201         echo "memory"
202         echo "by_type"
203         echo "by_class"
204         echo "by_opcode"
205         echo "by_rcode"
206         echo "by_flags"
207         echo "histogram"
208         exit 0
209 fi
210
211 # determine my type, by name
212 id=`echo $0 | sed -e 's/^.*unbound_munin_//'`
213 if test "$id"x = ""x; then
214         # some default to keep people sane.
215         id="hits"
216 fi
217
218 # if $1 exists in statefile, config is echoed with label $2
219 exist_config ( ) {
220         mn=`echo $1 | sed $ABBREV | tr . _`
221         if grep '^'$1'=' $state >/dev/null 2>&1; then
222                 echo "$mn.label $2"
223                 echo "$mn.min 0"
224                 echo "$mn.type ABSOLUTE"
225         fi
226 }
227
228 # print label and min 0 for a name $1 in unbound format
229 p_config ( ) {
230         mn=`echo $1 | sed $ABBREV | tr . _`
231         echo $mn.label "$2"
232         echo $mn.min 0
233         echo $mn.type $3
234 }
235
236 if test "$1" = "config" ; then
237         if test ! -f $state; then
238                 get_state
239         fi
240         case $id in
241         hits)
242                 echo "graph_title Unbound DNS traffic and cache hits"
243                 echo "graph_args --base 1000 -l 0"
244                 echo "graph_vlabel queries / \${graph_period}"
245                 echo "graph_scale no"
246                 echo "graph_category dns"
247                 for x in `grep "^thread[0-9][0-9]*\.num\.queries=" $state |
248                         sed -e 's/=.*//'`; do
249                         exist_config $x "queries handled by `basename $x .num.queries`"
250                 done
251                 p_config "total.num.queries" "total queries from clients" "ABSOLUTE"
252                 p_config "total.num.cachehits" "cache hits" "ABSOLUTE"
253                 p_config "total.num.prefetch" "cache prefetch" "ABSOLUTE"
254                 p_config "num.query.tcp" "TCP queries" "ABSOLUTE"
255                 p_config "num.query.tcpout" "TCP out queries" "ABSOLUTE"
256                 p_config "num.query.udpout" "UDP out queries" "ABSOLUTE"
257                 p_config "num.query.tls" "TLS queries" "ABSOLUTE"
258                 p_config "num.query.tls.resume" "TLS resumes" "ABSOLUTE"
259                 p_config "num.query.ipv6" "IPv6 queries" "ABSOLUTE"
260                 p_config "unwanted.queries" "queries that failed acl" "ABSOLUTE"
261                 p_config "unwanted.replies" "unwanted or unsolicited replies" "ABSOLUTE"
262                 echo "u_replies.warning $warn"
263                 echo "u_replies.critical $crit"
264                 echo "graph_info DNS queries to the recursive resolver. The unwanted replies could be innocent duplicate packets, late replies, or spoof threats."
265                 ;;
266         queue)
267                 echo "graph_title Unbound requestlist size"
268                 echo "graph_args --base 1000 -l 0"
269                 echo "graph_vlabel number of queries"
270                 echo "graph_scale no"
271                 echo "graph_category dns"
272                 p_config "total.requestlist.avg" "Average size of queue on insert" "GAUGE"
273                 p_config "total.requestlist.max" "Max size of queue (in 5 min)" "GAUGE"
274                 p_config "total.requestlist.overwritten" "Number of queries replaced by new ones" "GAUGE"
275                 p_config "total.requestlist.exceeded" "Number of queries dropped due to lack of space" "GAUGE"
276                 echo "graph_info The queries that did not hit the cache and need recursion service take up space in the requestlist. If there are too many queries, first queries get overwritten, and at last resort dropped."
277                 ;;
278         memory)
279                 echo "graph_title Unbound memory usage"
280                 echo "graph_args --base 1024 -l 0"
281                 echo "graph_vlabel memory used in bytes"
282                 echo "graph_category dns"
283                 p_config "mem.cache.rrset" "RRset cache memory" "GAUGE"
284                 p_config "mem.cache.message" "Message cache memory" "GAUGE"
285                 p_config "mem.mod.iterator" "Iterator module memory" "GAUGE"
286                 p_config "mem.mod.validator" "Validator module and key cache memory" "GAUGE"
287                 p_config "msg.cache.count" "msg cache count" "GAUGE"
288                 p_config "rrset.cache.count" "rrset cache count" "GAUGE"
289                 p_config "infra.cache.count" "infra cache count" "GAUGE"
290                 p_config "key.cache.count" "key cache count" "GAUGE"
291                 echo "graph_info The memory used by unbound."
292                 ;;
293         by_type)
294                 echo "graph_title Unbound DNS queries by type"
295                 echo "graph_args --base 1000 -l 0"
296                 echo "graph_vlabel queries / \${graph_period}"
297                 echo "graph_scale no"
298                 echo "graph_category dns"
299                 for nm in `grep "^num.query.type" $seentags`; do
300                         tp=`echo $nm | sed -e s/num.query.type.//`
301                         p_config "$nm" "$tp" "ABSOLUTE"
302                 done
303                 echo "graph_info queries by DNS RR type queried for"
304                 ;;
305         by_class)
306                 echo "graph_title Unbound DNS queries by class"
307                 echo "graph_args --base 1000 -l 0"
308                 echo "graph_vlabel queries / \${graph_period}"
309                 echo "graph_scale no"
310                 echo "graph_category dns"
311                 for nm in `grep "^num.query.class" $seentags`; do
312                         tp=`echo $nm | sed -e s/num.query.class.//`
313                         p_config "$nm" "$tp" "ABSOLUTE"
314                 done
315                 echo "graph_info queries by DNS RR class queried for."
316                 ;;
317         by_opcode)
318                 echo "graph_title Unbound DNS queries by opcode"
319                 echo "graph_args --base 1000 -l 0"
320                 echo "graph_vlabel queries / \${graph_period}"
321                 echo "graph_scale no"
322                 echo "graph_category dns"
323                 for nm in `grep "^num.query.opcode" $seentags`; do
324                         tp=`echo $nm | sed -e s/num.query.opcode.//`
325                         p_config "$nm" "$tp" "ABSOLUTE"
326                 done
327                 echo "graph_info queries by opcode in the query packet."
328                 ;;
329         by_rcode)
330                 echo "graph_title Unbound DNS answers by return code"
331                 echo "graph_args --base 1000 -l 0"
332                 echo "graph_vlabel answer packets / \${graph_period}"
333                 echo "graph_scale no"
334                 echo "graph_category dns"
335                 for nm in `grep "^num.answer.rcode" $seentags`; do
336                         tp=`echo $nm | sed -e s/num.answer.rcode.//`
337                         p_config "$nm" "$tp" "ABSOLUTE"
338                 done
339                 p_config "num.answer.secure" "answer secure" "ABSOLUTE"
340                 p_config "num.answer.bogus" "answer bogus" "ABSOLUTE"
341                 p_config "num.rrset.bogus" "num rrsets marked bogus" "ABSOLUTE"
342                 echo "graph_info answers sorted by return value. rrsets bogus is the number of rrsets marked bogus per \${graph_period} by the validator"
343                 ;;
344         by_flags)
345                 echo "graph_title Unbound DNS incoming queries by flags"
346                 echo "graph_args --base 1000 -l 0"
347                 echo "graph_vlabel queries / \${graph_period}"
348                 echo "graph_scale no"
349                 echo "graph_category dns"
350                 p_config "num.query.flags.QR" "QR (query reply) flag" "ABSOLUTE"
351                 p_config "num.query.flags.AA" "AA (auth answer) flag" "ABSOLUTE"
352                 p_config "num.query.flags.TC" "TC (truncated) flag" "ABSOLUTE"
353                 p_config "num.query.flags.RD" "RD (recursion desired) flag" "ABSOLUTE"
354                 p_config "num.query.flags.RA" "RA (rec avail) flag" "ABSOLUTE"
355                 p_config "num.query.flags.Z" "Z (zero) flag" "ABSOLUTE"
356                 p_config "num.query.flags.AD" "AD (auth data) flag" "ABSOLUTE"
357                 p_config "num.query.flags.CD" "CD (check disabled) flag" "ABSOLUTE"
358                 p_config "num.query.edns.present" "EDNS OPT present" "ABSOLUTE"
359                 p_config "num.query.edns.DO" "DO (DNSSEC OK) flag" "ABSOLUTE"
360                 echo "graph_info This graphs plots the flags inside incoming queries. For example, if QR, AA, TC, RA, Z flags are set, the query can be rejected. RD, AD, CD and DO are legitimately set by some software."
361                 ;;
362         histogram)
363                 echo "graph_title Unbound DNS histogram of reply time"
364                 echo "graph_args --base 1000 -l 0"
365                 echo "graph_vlabel queries / \${graph_period}"
366                 echo "graph_scale no"
367                 echo "graph_category dns"
368                 echo hcache.label "cache hits"
369                 echo hcache.min 0
370                 echo hcache.type ABSOLUTE
371                 echo hcache.draw AREA
372                 echo hcache.colour 999999
373                 echo h64ms.label "0 msec - 66 msec"
374                 echo h64ms.min 0
375                 echo h64ms.type ABSOLUTE
376                 echo h64ms.draw STACK
377                 echo h64ms.colour 0000FF
378                 echo h128ms.label "66 msec - 131 msec"
379                 echo h128ms.min 0
380                 echo h128ms.type ABSOLUTE
381                 echo h128ms.colour 1F00DF
382                 echo h128ms.draw STACK
383                 echo h256ms.label "131 msec - 262 msec"
384                 echo h256ms.min 0
385                 echo h256ms.type ABSOLUTE
386                 echo h256ms.draw STACK
387                 echo h256ms.colour 3F00BF
388                 echo h512ms.label "262 msec - 524 msec"
389                 echo h512ms.min 0
390                 echo h512ms.type ABSOLUTE
391                 echo h512ms.draw STACK
392                 echo h512ms.colour 5F009F
393                 echo h1s.label "524 msec - 1 sec"
394                 echo h1s.min 0
395                 echo h1s.type ABSOLUTE
396                 echo h1s.draw STACK
397                 echo h1s.colour 7F007F
398                 echo h2s.label "1 sec - 2 sec"
399                 echo h2s.min 0
400                 echo h2s.type ABSOLUTE
401                 echo h2s.draw STACK
402                 echo h2s.colour 9F005F
403                 echo h4s.label "2 sec - 4 sec"
404                 echo h4s.min 0
405                 echo h4s.type ABSOLUTE
406                 echo h4s.draw STACK
407                 echo h4s.colour BF003F
408                 echo h8s.label "4 sec - 8 sec"
409                 echo h8s.min 0
410                 echo h8s.type ABSOLUTE
411                 echo h8s.draw STACK
412                 echo h8s.colour DF001F
413                 echo h16s.label "8 sec - ..."
414                 echo h16s.min 0
415                 echo h16s.type ABSOLUTE
416                 echo h16s.draw STACK
417                 echo h16s.colour FF0000
418                 echo "graph_info Histogram of the reply times for queries."
419                 ;;
420         esac
421
422         exit 0
423 fi
424
425 # do the stats itself
426 get_state
427
428 # get the time elapsed
429 get_value "time.elapsed"
430 if test $value = 0 || test $value = "0.000000"; then
431         echo "error: time elapsed 0 or could not retrieve data"
432         exit 1
433 fi
434 elapsed="$value"
435
436 # print value for $1
437 print_value ( ) {
438         mn=`echo $1 | sed $ABBREV | tr . _`
439         get_value $1
440         echo "$mn.value" $value
441 }
442
443 # print value if line already found in $2
444 print_value_line ( ) {
445         mn=`echo $1 | sed $ABBREV | tr . _`
446         value="`echo $2 | sed -e 's/^.*=//'`"
447         echo "$mn.value" $value
448 }
449
450
451 case $id in
452 hits)
453         for x in `grep "^thread[0-9][0-9]*\.num\.queries=" $state |
454                 sed -e 's/=.*//'` total.num.queries \
455                 total.num.cachehits total.num.prefetch num.query.tcp \
456                 num.query.tcpout num.query.udpout num.query.tls num.query.tls.resume \
457                 num.query.ipv6 unwanted.queries \
458                 unwanted.replies; do
459                 if grep "^"$x"=" $state >/dev/null 2>&1; then
460                         print_value $x
461                 fi
462         done
463         ;;
464 queue)
465         for x in total.requestlist.avg total.requestlist.max \
466                 total.requestlist.overwritten total.requestlist.exceeded; do
467                 print_value $x
468         done
469         ;;
470 memory)
471         for x in mem.cache.rrset mem.cache.message mem.mod.iterator \
472                 mem.mod.validator msg.cache.count rrset.cache.count \
473                 infra.cache.count key.cache.count; do
474                 print_value $x
475         done
476         ;;
477 by_type)
478         for nm in `grep "^num.query.type" $seentags`; do
479                 print_value $nm
480         done
481         ;;
482 by_class)
483         for nm in `grep "^num.query.class" $seentags`; do
484                 print_value $nm
485         done
486         ;;
487 by_opcode)
488         for nm in `grep "^num.query.opcode" $seentags`; do
489                 print_value $nm
490         done
491         ;;
492 by_rcode)
493         for nm in `grep "^num.answer.rcode" $seentags`; do
494                 print_value $nm
495         done
496         print_value "num.answer.secure"
497         print_value "num.answer.bogus"
498         print_value "num.rrset.bogus"
499         ;;
500 by_flags)
501         for x in num.query.flags.QR num.query.flags.AA num.query.flags.TC num.query.flags.RD num.query.flags.RA num.query.flags.Z num.query.flags.AD num.query.flags.CD num.query.edns.present num.query.edns.DO; do
502                 print_value $x
503         done
504         ;;
505 histogram)
506         get_value total.num.cachehits
507         echo hcache.value $value
508         r=0
509         for x in histogram.000000.000000.to.000000.000001 \
510                 histogram.000000.000001.to.000000.000002 \
511                 histogram.000000.000002.to.000000.000004 \
512                 histogram.000000.000004.to.000000.000008 \
513                 histogram.000000.000008.to.000000.000016 \
514                 histogram.000000.000016.to.000000.000032 \
515                 histogram.000000.000032.to.000000.000064 \
516                 histogram.000000.000064.to.000000.000128 \
517                 histogram.000000.000128.to.000000.000256 \
518                 histogram.000000.000256.to.000000.000512 \
519                 histogram.000000.000512.to.000000.001024 \
520                 histogram.000000.001024.to.000000.002048 \
521                 histogram.000000.002048.to.000000.004096 \
522                 histogram.000000.004096.to.000000.008192 \
523                 histogram.000000.008192.to.000000.016384 \
524                 histogram.000000.016384.to.000000.032768 \
525                 histogram.000000.032768.to.000000.065536; do
526                 get_value $x
527                 r=`expr $r + $value`
528         done
529         echo h64ms.value $r
530         get_value histogram.000000.065536.to.000000.131072
531         echo h128ms.value $value
532         get_value histogram.000000.131072.to.000000.262144
533         echo h256ms.value $value
534         get_value histogram.000000.262144.to.000000.524288
535         echo h512ms.value $value
536         get_value histogram.000000.524288.to.000001.000000
537         echo h1s.value $value
538         get_value histogram.000001.000000.to.000002.000000
539         echo h2s.value $value
540         get_value histogram.000002.000000.to.000004.000000
541         echo h4s.value $value
542         get_value histogram.000004.000000.to.000008.000000
543         echo h8s.value $value
544         r=0
545         for x in histogram.000008.000000.to.000016.000000 \
546                 histogram.000016.000000.to.000032.000000 \
547                 histogram.000032.000000.to.000064.000000 \
548                 histogram.000064.000000.to.000128.000000 \
549                 histogram.000128.000000.to.000256.000000 \
550                 histogram.000256.000000.to.000512.000000 \
551                 histogram.000512.000000.to.001024.000000 \
552                 histogram.001024.000000.to.002048.000000 \
553                 histogram.002048.000000.to.004096.000000 \
554                 histogram.004096.000000.to.008192.000000 \
555                 histogram.008192.000000.to.016384.000000 \
556                 histogram.016384.000000.to.032768.000000 \
557                 histogram.032768.000000.to.065536.000000 \
558                 histogram.065536.000000.to.131072.000000 \
559                 histogram.131072.000000.to.262144.000000 \
560                 histogram.262144.000000.to.524288.000000; do
561                 get_value $x
562                 r=`expr $r + $value`
563         done
564         echo h16s.value $r
565         ;;
566 esac