]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - contrib/ofed/management/infiniband-diags/scripts/IBswcountlimits.pm
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / contrib / ofed / management / infiniband-diags / scripts / IBswcountlimits.pm
1 #!/usr/bin/perl
2 #
3 # Copyright (c) 2006 The Regents of the University of California.
4 # Copyright (c) 2006-2008 Voltaire, Inc. All rights reserved.
5 #
6 # Produced at Lawrence Livermore National Laboratory.
7 # Written by Ira Weiny <weiny2@llnl.gov>.
8 #            Erez Strauss from Voltaire for help in the get_link_ends code.
9 #
10 # This software is available to you under a choice of one of two
11 # licenses.  You may choose to be licensed under the terms of the GNU
12 # General Public License (GPL) Version 2, available from the file
13 # COPYING in the main directory of this source tree, or the
14 # OpenIB.org BSD license below:
15 #
16 #     Redistribution and use in source and binary forms, with or
17 #     without modification, are permitted provided that the following
18 #     conditions are met:
19 #
20 #      - Redistributions of source code must retain the above
21 #        copyright notice, this list of conditions and the following
22 #        disclaimer.
23 #
24 #      - Redistributions in binary form must reproduce the above
25 #        copyright notice, this list of conditions and the following
26 #        disclaimer in the documentation and/or other materials
27 #        provided with the distribution.
28 #
29 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
33 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
34 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
35 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 # SOFTWARE.
37 #
38
39 use strict;
40
41 %IBswcountlimits::cur_counts      = ();
42 %IBswcountlimits::new_counts      = ();
43 @IBswcountlimits::suppress_errors = ();
44 $IBswcountlimits::link_ends       = undef;
45 $IBswcountlimits::pause_time      = 10;
46 $IBswcountlimits::cache_dir       = "/var/cache/infiniband-diags";
47
48 # all the PerfMgt counters
49 @IBswcountlimits::counters = (
50         "SymbolErrors",        "LinkRecovers",
51         "LinkDowned",          "RcvErrors",
52         "RcvRemotePhysErrors", "RcvSwRelayErrors",
53         "XmtDiscards",         "XmtConstraintErrors",
54         "RcvConstraintErrors", "LinkIntegrityErrors",
55         "ExcBufOverrunErrors", "VL15Dropped",
56         "XmtData",             "RcvData",
57         "XmtPkts",             "RcvPkts"
58 );
59
60 # non-critical counters
61 %IBswcountlimits::error_counters = (
62         "SymbolErrors",
63 "No action is required except if counter is increasing along with LinkRecovers",
64         "LinkRecovers",
65 "If this is increasing along with SymbolErrors this may indicate a bad link, run ibswportwatch.pl on this port",
66         "LinkDowned",
67         "Number of times the port has gone down (Usually for valid reasons)",
68         "RcvErrors",
69 "This is a bad link, if the link is internal to a 288 try setting SDR, otherwise check the cable",
70         "RcvRemotePhysErrors",
71         "This indicates a problem ELSEWHERE in the fabric.",
72         "XmtDiscards",
73 "This is a symptom of congestion and may require tweaking either HOQ or switch lifetime values",
74         "XmtConstraintErrors",
75         "This is a result of bad partitioning, check partition configuration.",
76         "RcvConstraintErrors",
77         "This is a result of bad partitioning, check partition configuration.",
78         "LinkIntegrityErrors",
79         "May indicate a bad link, run ibswportwatch.pl on this port",
80         "ExcBufOverrunErrors",
81 "This is a flow control state machine error and can be caused by packets with physical errors",
82         "VL15Dropped",
83         "check with ibswportwatch.pl, if increasing in SMALL increments, OK",
84         "RcvSwRelayErrors",
85         "This counter can increase due to a valid network event"
86 );
87
88 sub check_counters
89 {
90         my $print_action = $_[0];
91         my $actions      = undef;
92
93         COUNTER: foreach my $cnt (keys %IBswcountlimits::error_counters) {
94                 if ($IBswcountlimits::cur_counts{$cnt} > 0) {
95                         foreach my $sup_cnt (@IBswcountlimits::suppress_errors) {
96                                 if ("$cnt" eq $sup_cnt) { next COUNTER; }
97                         }
98                         print " [$cnt == $IBswcountlimits::cur_counts{$cnt}]";
99                         if ("$print_action" eq "yes") {
100                                 $actions = join " ",
101                                   (
102                                         $actions,
103                                         "         $cnt: $IBswcountlimits::error_counters{$cnt}\n"
104                                   );
105                         }
106                 }
107         }
108
109         if ($actions) {
110                 print "\n         Actions:\n$actions";
111         }
112 }
113
114 # Data counters
115 %IBswcountlimits::data_counters = (
116         "XmtData",
117 "Total number of data octets, divided by 4, transmitted on all VLs from the port",
118         "RcvData",
119 "Total number of data octets, divided by 4, received on all VLs to the port",
120         "XmtPkts",
121 "Total number of packets, excluding link packets, transmitted on all VLs from the port",
122         "RcvPkts",
123 "Total number of packets, excluding link packets, received on all VLs to the port"
124 );
125
126 sub check_data_counters
127 {
128         my $print_action = $_[0];
129         my $actions      = undef;
130
131         COUNTER: foreach my $cnt (keys %IBswcountlimits::data_counters) {
132                 print " [$cnt == $IBswcountlimits::cur_counts{$cnt}]";
133                 if ("$print_action" eq "yes") {
134                         $actions = join " ",
135                           (
136                                 $actions,
137                                 "         $cnt: $IBswcountlimits::data_counters{$cnt}\n"
138                           );
139                 }
140         }
141         if ($actions) {
142                 print "\n         Descriptions:\n$actions";
143         }
144 }
145
146 sub print_data_rates
147 {
148         COUNTER: foreach my $cnt (keys %IBswcountlimits::data_counters) {
149                 my $cnt_per_second = calculate_rate(
150                         $IBswcountlimits::cur_counts{$cnt},
151                         $IBswcountlimits::new_counts{$cnt}
152                 );
153                 print "   $cnt_per_second $cnt/second\n";
154         }
155 }
156
157 # =========================================================================
158 # Rate dependent counters
159 # calculate the count/sec
160 # calculate_rate old_count new_count
161 sub calculate_rate
162 {
163         my $rate    = 0;
164         my $old_val = $_[0];
165         my $new_val = $_[1];
166         my $rate    = ($new_val - $old_val) / $IBswcountlimits::pause_time;
167         return ($rate);
168 }
169 %IBswcountlimits::rate_dep_thresholds = (
170         "SymbolErrors", 10, "LinkRecovers",        10,
171         "RcvErrors",    10, "LinkIntegrityErrors", 10,
172         "XmtDiscards",  10
173 );
174
175 sub check_counter_rates
176 {
177         foreach my $rate_count (keys %IBswcountlimits::rate_dep_thresholds) {
178                 my $rate = calculate_rate(
179                         $IBswcountlimits::cur_counts{$rate_count},
180                         $IBswcountlimits::new_counts{$rate_count}
181                 );
182                 if ($rate > $IBswcountlimits::rate_dep_thresholds{$rate_count}) {
183                         print "Detected excessive rate for $rate_count ($rate cnts/sec)\n";
184                 } elsif ($rate > 0) {
185                         print "Detected rate for $rate_count ($rate cnts/sec)\n";
186                 }
187         }
188 }
189
190 # =========================================================================
191 #
192 sub clear_counters
193 {
194         # clear the counters
195         foreach my $count (@IBswcountlimits::counters) {
196                 $IBswcountlimits::cur_counts{$count} = 0;
197         }
198 }
199
200 # =========================================================================
201 #
202 sub any_counts
203 {
204         my $total = 0;
205         my $count = 0;
206         foreach $count (keys %IBswcountlimits::critical) {
207                 $total = $total + $IBswcountlimits::cur_counts{$count};
208         }
209         COUNTER: foreach $count (keys %IBswcountlimits::error_counters) {
210                 foreach my $sup_cnt (@IBswcountlimits::suppress_errors) {
211                         if ("$count" eq $sup_cnt) { next COUNTER; }
212                 }
213                 $total = $total + $IBswcountlimits::cur_counts{$count};
214         }
215         return ($total);
216 }
217
218 # =========================================================================
219 #
220 sub ensure_cache_dir
221 {
222         if (!(-d "$IBswcountlimits::cache_dir") &&
223             !mkdir($IBswcountlimits::cache_dir, 0700)) {
224                 die "cannot create $IBswcountlimits::cache_dir: $!\n";
225         }
226 }
227
228 # =========================================================================
229 # get_cache_file(ca_name, ca_port)
230 #
231 sub get_cache_file
232 {
233         my $ca_name = $_[0];
234         my $ca_port = $_[1];
235         ensure_cache_dir;
236         return (
237                 "$IBswcountlimits::cache_dir/ibnetdiscover-$ca_name-$ca_port.topology");
238 }
239
240 # =========================================================================
241 # get_ca_name_port_param_string(ca_name, ca_port)
242 #
243 sub get_ca_name_port_param_string
244 {
245         my $ca_name = $_[0];
246         my $ca_port = $_[1];
247
248         if ("$ca_name" ne "") { $ca_name = "-C $ca_name"; }
249         if ("$ca_port" ne "") { $ca_port = "-P $ca_port"; }
250
251         return ("$ca_name $ca_port");
252 }
253
254 # =========================================================================
255 # generate_ibnetdiscover_topology(ca_name, ca_port)
256 #
257 sub generate_ibnetdiscover_topology
258 {
259         my $ca_name      = $_[0];
260         my $ca_port      = $_[1];
261         my $cache_file   = get_cache_file($ca_name, $ca_port);
262         my $extra_params = get_ca_name_port_param_string($ca_name, $ca_port);
263
264         if (`ibnetdiscover -g $extra_params > $cache_file`) {
265                 die "Execution of ibnetdiscover failed: $!\n";
266         }
267 }
268
269 # =========================================================================
270 # get_link_ends(regenerate_map, ca_name, ca_port)
271 #
272 sub get_link_ends
273 {
274         my $regenerate_map = $_[0];
275         my $ca_name        = $_[1];
276         my $ca_port        = $_[2];
277
278         my $cache_file = get_cache_file($ca_name, $ca_port);
279
280         if ($regenerate_map || !(-f "$cache_file")) {
281                 generate_ibnetdiscover_topology($ca_name, $ca_port);
282         }
283         open IBNET_TOPO, "<$cache_file"
284           or die "Failed to open ibnet topology: $!\n";
285         my $in_switch  = "no";
286         my $desc       = "";
287         my $guid       = "";
288         my $loc_sw_lid = "";
289
290         my $loc_port = "";
291         my $line     = "";
292
293         while ($line = <IBNET_TOPO>) {
294                 if ($line =~ /^Switch.*\"S-(.*)\"\s+#.*\"(.*)\".* lid (\d+).*/) {
295                         $guid       = $1;
296                         $desc       = $2;
297                         $loc_sw_lid = $3;
298                         $in_switch  = "yes";
299                 }
300                 if ($in_switch eq "yes") {
301                         my $rec = undef;
302                         if ($line =~
303 /^\[(\d+)\]\s+\"[HSR]-(.+)\"\[(\d+)\](\(.+\))?\s+#.*\"(.*)\"\.* lid (\d+).*/
304                           )
305                         {
306                                 $loc_port = $1;
307                                 my $rem_guid      = $2;
308                                 my $rem_port      = $3;
309                                 my $rem_port_guid = $4;
310                                 my $rem_desc      = $5;
311                                 my $rem_lid       = $6;
312                                 $rec = {
313                                         loc_guid      => "0x$guid",
314                                         loc_port      => $loc_port,
315                                         loc_ext_port  => "",
316                                         loc_desc      => $desc,
317                                         loc_sw_lid    => $loc_sw_lid,
318                                         rem_guid      => "0x$rem_guid",
319                                         rem_lid       => $rem_lid,
320                                         rem_port      => $rem_port,
321                                         rem_ext_port  => "",
322                                         rem_desc      => $rem_desc,
323                                         rem_port_guid => $rem_port_guid
324                                 };
325                         }
326                         if ($line =~
327 /^\[(\d+)\]\[ext (\d+)\]\s+\"[HSR]-(.+)\"\[(\d+)\](\(.+\))?\s+#.*\"(.*)\"\.* lid (\d+).*/
328                           )
329                         {
330                                 $loc_port = $1;
331                                 my $loc_ext_port  = $2;
332                                 my $rem_guid      = $3;
333                                 my $rem_port      = $4;
334                                 my $rem_port_guid = $5;
335                                 my $rem_desc      = $6;
336                                 my $rem_lid       = $7;
337                                 $rec = {
338                                         loc_guid      => "0x$guid",
339                                         loc_port      => $loc_port,
340                                         loc_ext_port  => $loc_ext_port,
341                                         loc_desc      => $desc,
342                                         loc_sw_lid    => $loc_sw_lid,
343                                         rem_guid      => "0x$rem_guid",
344                                         rem_lid       => $rem_lid,
345                                         rem_port      => $rem_port,
346                                         rem_ext_port  => "",
347                                         rem_desc      => $rem_desc,
348                                         rem_port_guid => $rem_port_guid
349                                 };
350                         }
351                         if ($line =~
352 /^\[(\d+)\]\s+\"[HSR]-(.+)\"\[(\d+)\]\[ext (\d+)\](\(.+\))?\s+#.*\"(.*)\"\.* lid (\d+).*/
353                           )
354                         {
355                                 $loc_port = $1;
356                                 my $rem_guid      = $2;
357                                 my $rem_port      = $3;
358                                 my $rem_ext_port  = $4;
359                                 my $rem_port_guid = $5;
360                                 my $rem_desc      = $6;
361                                 my $rem_lid       = $7;
362                                 $rec = {
363                                         loc_guid      => "0x$guid",
364                                         loc_port      => $loc_port,
365                                         loc_ext_port  => "",
366                                         loc_desc      => $desc,
367                                         loc_sw_lid    => $loc_sw_lid,
368                                         rem_guid      => "0x$rem_guid",
369                                         rem_lid       => $rem_lid,
370                                         rem_port      => $rem_port,
371                                         rem_ext_port  => $rem_ext_port,
372                                         rem_desc      => $rem_desc,
373                                         rem_port_guid => $rem_port_guid
374                                 };
375                         }
376                         if ($line =~
377 /^\[(\d+)\]\[ext (\d+)\]\s+\"[HSR]-(.+)\"\[(\d+)\]\[ext (\d+)\](\(.+\))?\s+#.*\"(.*)\"\.* lid (\d+).*/
378                           )
379                         {
380                                 $loc_port = $1;
381                                 my $loc_ext_port  = $2;
382                                 my $rem_guid      = $3;
383                                 my $rem_port      = $4;
384                                 my $rem_ext_port  = $5;
385                                 my $rem_port_guid = $6;
386                                 my $rem_desc      = $7;
387                                 my $rem_lid       = $8;
388                                 $rec = {
389                                         loc_guid      => "0x$guid",
390                                         loc_port      => $loc_port,
391                                         loc_ext_port  => $loc_ext_port,
392                                         loc_desc      => $desc,
393                                         loc_sw_lid    => $loc_sw_lid,
394                                         rem_guid      => "0x$rem_guid",
395                                         rem_lid       => $rem_lid,
396                                         rem_port      => $rem_port,
397                                         rem_ext_port  => $rem_ext_port,
398                                         rem_desc      => $rem_desc,
399                                         rem_port_guid => $rem_port_guid
400                                 };
401                         }
402                         if ($rec) {
403                                 $rec->{rem_port_guid} =~ s/\((.*)\)/$1/;
404                                 $IBswcountlimits::link_ends{"0x$guid"}{$loc_port} = $rec;
405                         }
406                 }
407
408                 if ($line =~ /^Ca.*/ || $line =~ /^Rt.*/) { $in_switch = "no"; }
409         }
410         close IBNET_TOPO;
411 }
412
413 # =========================================================================
414 # get_num_ports(switch_guid, ca_name, ca_port)
415 #
416 sub get_num_ports
417 {
418         my $guid         = $_[0];
419         my $ca_name      = $_[1];
420         my $ca_port      = $_[2];
421         my $num_ports    = 0;
422         my $extra_params = get_ca_name_port_param_string($ca_name, $ca_port);
423
424         my $data         = `smpquery $extra_params -G nodeinfo $guid` ||
425                 die "'smpquery $extra_params -G nodeinfo $guid' faild\n";
426         my @lines        = split("\n", $data);
427         my $pkt_lifetime = "";
428         foreach my $line (@lines) {
429                 if ($line =~ /^NumPorts:\.+(.*)/) { $num_ports = $1; }
430         }
431         return ($num_ports);
432 }
433
434 # =========================================================================
435 # format_guid(guid)
436 # The diags store the guids as strings.  This converts the guid supplied
437 # to the correct string format.
438 # eg: 0x0008f10400411f56 == 0x8f10400411f56
439 #
440 sub format_guid
441 {
442         my $guid     = $_[0];
443         my $guid_str = "";
444
445         $guid =~ tr/[A-F]/[a-f]/;
446         if ($guid =~ /0x(.*)/) {
447                 $guid_str = sprintf("0x%016s", $1);
448         } else {
449                 $guid_str = sprintf("0x%016s", $guid);
450         }
451         return ($guid_str);
452 }
453
454 # =========================================================================
455 # convert_dr_to_guid(direct_route)
456 #
457 sub convert_dr_to_guid
458 {
459         my $guid = undef;
460
461         my $data = `smpquery nodeinfo -D $_[0]` ||
462                 die "'mpquery nodeinfo -D $_[0]' failed\n";
463         my @lines = split("\n", $data);
464         foreach my $line (@lines) {
465                 if ($line =~ /^PortGuid:\.+(.*)/) { $guid = $1; }
466         }
467         return format_guid($guid);
468 }
469
470 # =========================================================================
471 # get_node_type(guid_or_direct_route)
472 #
473 sub get_node_type
474 {
475         my $type      = undef;
476         my $query_arg = "smpquery nodeinfo ";
477         if ($_[0] =~ /x/) {
478                 # assume arg is a guid if contains an x
479                 $query_arg .= "-G " . $_[0];
480         } else {
481                 # assume arg is a direct path
482                 $query_arg .= "-D " . $_[0];
483         }
484
485         my $data = `$query_arg` ||
486                 die "'$query_arg' failed\n";
487         my @lines = split("\n", $data);
488         foreach my $line (@lines) {
489                 if ($line =~ /^NodeType:\.+(.*)/) { $type = $1; }
490         }
491         return $type;
492 }
493
494 # =========================================================================
495 # is_switch(guid_or_direct_route)
496 #
497 sub is_switch
498 {
499         my $node_type = &get_node_type($_[0]);
500         return ($node_type =~ /Switch/);
501 }