2 * Copyright (c) 2004-2009 Voltaire Inc. All rights reserved.
3 * Copyright (c) 2007 Xsigo Systems Inc. All rights reserved.
4 * Copyright (c) 2008 Lawrence Livermore National Lab. All rights reserved.
5 * Copyright (c) 2009 HNR Consulting. All rights reserved.
6 * Copyright (c) 2010,2011 Mellanox Technologies LTD. All rights reserved.
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
40 #endif /* HAVE_CONFIG_H */
53 #include <complib/cl_nodenamemap.h>
54 #include <infiniband/ibnetdisc.h>
55 #include <infiniband/mad.h>
57 #include "ibdiag_common.h"
58 #include "ibdiag_sa.h"
60 struct ibmad_port *ibmad_port;
61 static char *node_name_map_file = NULL;
62 static nn_map_t *node_name_map = NULL;
63 static char *load_cache_file = NULL;
64 static uint16_t lid2sl_table[sizeof(uint8_t) * 1024 * 48] = { 0 };
65 static int obtain_sl = 1;
67 int data_counters = 0;
68 int data_counters_only = 0;
70 uint64_t port_guid = 0;
71 char *port_guid_str = NULL;
74 enum MAD_FIELDS suppressed_fields[SUP_MAX];
76 uint8_t node_type_to_print = 0;
77 unsigned clear_errors = 0, clear_counts = 0, details = 0;
79 #define PRINT_SWITCH 0x1
81 #define PRINT_ROUTER 0x4
82 #define PRINT_ALL 0xFF /* all nodes default flag */
84 #define DEFAULT_HALF_WORLD_PR_TIMEOUT (3000)
91 int pma_query_failures;
94 #define DEF_THRES_FILE IBDIAG_CONFIG_PATH"/error_thresholds"
95 static char *threshold_file = DEF_THRES_FILE;
97 /* define a "packet" with threshold values in it */
98 uint8_t thresholds[1204] = { 0 };
99 char * threshold_str = "";
101 static unsigned valid_gid(ib_gid_t * gid)
104 memset(&zero_gid, 0, sizeof zero_gid);
105 return memcmp(&zero_gid, gid, sizeof(*gid));
108 static void set_thres(char *name, uint32_t val)
113 for (f = IB_PC_FIRST_F; f <= IB_PC_LAST_F; f++) {
114 if (strcmp(name, mad_field_name(f)) == 0) {
115 mad_encode_field(thresholds, f, &val);
116 snprintf(tmp, 255, "[%s = %u]", name, val);
117 threshold_str = realloc(threshold_str,
118 strlen(threshold_str)+strlen(tmp)+1);
119 if (!threshold_str) {
120 fprintf(stderr, "Failed to allocate memory: "
121 "%s\n", strerror(errno));
124 n = strlen(threshold_str);
125 strcpy(threshold_str+n, tmp);
130 static void set_thresholds(char *threshold_file)
134 FILE *thresf = fopen(threshold_file, "r");
135 char *p_prefix, *p_last;
143 snprintf(str, 63, "Thresholds: ");
144 threshold_str = malloc(strlen(str)+1);
145 if (!threshold_str) {
146 fprintf(stderr, "Failed to allocate memory: %s\n",
150 strcpy(threshold_str, str);
151 while (fgets(buf, sizeof buf, thresf) != NULL) {
152 p_prefix = strtok_r(buf, "\n", &p_last);
154 continue; /* ignore blank lines */
156 if (*p_prefix == '#')
157 continue; /* ignore comment lines */
159 name = strtok_r(p_prefix, "=", &p_last);
160 val_str = strtok_r(NULL, "\n", &p_last);
162 val = strtoul(val_str, NULL, 0);
163 set_thres(name, val);
169 static int exceeds_threshold(int field, unsigned val)
172 mad_decode_field(thresholds, field, &thres);
173 return (val > thres);
176 static void print_port_config(ibnd_node_t * node, int portnum)
178 char width[64], speed[64], state[64], physstate[64];
179 char remote_str[256];
183 char ext_port_str[256];
184 int iwidth, ispeed, fdr10, espeed, istate, iphystate, cap_mask;
187 ibnd_port_t *port = node->ports[portnum];
192 iwidth = mad_get_field(port->info, 0, IB_PORT_LINK_WIDTH_ACTIVE_F);
193 ispeed = mad_get_field(port->info, 0, IB_PORT_LINK_SPEED_ACTIVE_F);
194 fdr10 = mad_get_field(port->ext_info, 0,
195 IB_MLNX_EXT_PORT_LINK_SPEED_ACTIVE_F) & FDR10;
197 if (port->node->type == IB_NODE_SWITCH)
198 info = (uint8_t *)&port->node->ports[0]->info;
200 info = (uint8_t *)&port->info;
201 cap_mask = mad_get_field(info, 0, IB_PORT_CAPMASK_F);
202 if (cap_mask & CL_NTOH32(IB_PORT_CAP_HAS_EXT_SPEEDS))
203 espeed = mad_get_field(port->info, 0,
204 IB_PORT_LINK_SPEED_EXT_ACTIVE_F);
207 istate = mad_get_field(port->info, 0, IB_PORT_STATE_F);
208 iphystate = mad_get_field(port->info, 0, IB_PORT_PHYS_STATE_F);
210 remote_str[0] = '\0';
215 /* C14-24.2.1 states that a down port allows for invalid data to be
216 * returned for all PortInfo components except PortState and
217 * PortPhysicalState */
218 if (istate != IB_LINK_DOWN) {
221 sprintf(speed, "10.0 Gbps (FDR10)");
223 mad_dump_val(IB_PORT_LINK_SPEED_ACTIVE_F, speed,
226 mad_dump_val(IB_PORT_LINK_SPEED_EXT_ACTIVE_F, speed,
229 snprintf(link_str, 256, "(%3s %18s %6s/%8s)",
230 mad_dump_val(IB_PORT_LINK_WIDTH_ACTIVE_F, width, 64, &iwidth),
232 mad_dump_val(IB_PORT_STATE_F, state, 64, &istate),
233 mad_dump_val(IB_PORT_PHYS_STATE_F, physstate, 64, &iphystate));
235 snprintf(link_str, 256, "( %6s/%8s)",
236 mad_dump_val(IB_PORT_STATE_F, state, 64, &istate),
237 mad_dump_val(IB_PORT_PHYS_STATE_F, physstate, 64, &iphystate));
240 if (port->remoteport) {
241 char *rem_node_name = NULL;
243 if (port->remoteport->ext_portnum)
244 snprintf(ext_port_str, 256, "%d",
245 port->remoteport->ext_portnum);
247 ext_port_str[0] = '\0';
249 get_max_msg(width_msg, speed_msg, 256, port);
251 rem_node_name = remap_node_name(node_name_map,
252 port->remoteport->node->guid,
253 port->remoteport->node->
256 snprintf(remote_str, 256,
257 "0x%016" PRIx64 " %6d %4d[%2s] \"%s\" (%s %s)\n",
258 port->remoteport->guid,
259 port->remoteport->base_lid ? port->remoteport->
260 base_lid : port->remoteport->node->smalid,
261 port->remoteport->portnum, ext_port_str, rem_node_name,
262 width_msg, speed_msg);
266 snprintf(remote_str, 256, " [ ] \"\" ( )\n");
268 if (port->ext_portnum)
269 snprintf(ext_port_str, 256, "%d", port->ext_portnum);
271 ext_port_str[0] = '\0';
273 if (node->type == IB_NODE_SWITCH)
274 printf(" Link info: %6d", node->smalid);
276 printf(" Link info: %6d", port->base_lid);
278 printf("%4d[%2s] ==%s==> %s",
279 port->portnum, ext_port_str, link_str, remote_str);
282 static int suppress(enum MAD_FIELDS field)
285 for (i = 0; i < sup_total; i++)
286 if (field == suppressed_fields[i])
291 static void report_suppressed(void)
294 printf("## Suppressed:");
295 for (i = 0; i < sup_total; i++)
296 printf(" %s", mad_field_name(suppressed_fields[i]));
300 static int print_summary(void)
302 printf("\n## Summary: %d nodes checked, %d bad nodes found\n",
303 summary.nodes_checked, summary.bad_nodes);
304 printf("## %d ports checked, %d ports have errors beyond threshold\n",
305 summary.ports_checked, summary.bad_ports);
306 printf("## %s\n", threshold_str);
307 if (summary.pma_query_failures)
308 printf("## %d PMA query failures\n", summary.pma_query_failures);
310 return (summary.bad_ports);
313 static void insert_lid2sl_table(struct sa_query_result *r)
316 for (i = 0; i < r->result_cnt; i++) {
317 ib_path_rec_t *p_pr = (ib_path_rec_t *)sa_get_query_rec(r->p_result_madw, i);
318 lid2sl_table[cl_ntoh16(p_pr->dlid)] = ib_path_rec_sl(p_pr);
322 static int path_record_query(ib_gid_t sgid,uint64_t dguid)
325 ib_net64_t comp_mask = 0;
326 uint8_t reversible = 0;
327 struct sa_handle * h;
329 if (!(h = sa_get_handle()))
332 ibd_timeout = DEFAULT_HALF_WORLD_PR_TIMEOUT;
333 memset(&pr, 0, sizeof(pr));
335 CHECK_AND_SET_GID(sgid, pr.sgid, PR, SGID);
337 mad_encode_field(sgid.raw, IB_GID_GUID_F, &dguid);
338 CHECK_AND_SET_GID(sgid, pr.dgid, PR, DGID);
341 CHECK_AND_SET_VAL(1, 8, -1, pr.num_path, PR, NUMBPATH);/*to get only one PathRecord for each source and destination pair*/
342 CHECK_AND_SET_VAL(1, 8, -1, reversible, PR, REVERSIBLE);/*for a reversible path*/
343 pr.num_path |= reversible << 7;
344 struct sa_query_result result;
345 int ret = sa_query(h, IB_MAD_METHOD_GET_TABLE,
346 (uint16_t)IB_SA_ATTR_PATHRECORD,0,cl_ntoh64(comp_mask),ibd_sakey,
347 &pr, sizeof(pr), &result);
350 fprintf(stderr, "Query SA failed: %s; sa call path_query failed\n", strerror(ret));
353 if (result.status != IB_SA_MAD_STATUS_SUCCESS) {
354 sa_report_err(result.status);
359 insert_lid2sl_table(&result);
362 sa_free_result_mad(&result);
366 static int query_and_dump(char *buf, size_t size, ib_portid_t * portid,
367 char *node_name, int portnum,
368 const char *attr_name, uint16_t attr_id,
369 int start_field, int end_field)
375 memset(pc, 0, sizeof(pc));
377 if (!pma_query_via(pc, portid, portnum, ibd_timeout, attr_id,
379 IBWARN("%s query failed on %s, %s port %d", attr_name,
380 node_name, portid2str(portid), portnum);
381 summary.pma_query_failures++;
385 for (n = 0, i = start_field; i < end_field; i++) {
386 mad_decode_field(pc, i, (void *)&val);
388 n += snprintf(buf + n, size - n, " [%s == %u]",
389 mad_field_name(i), val);
396 static int print_results(ib_portid_t * portid, char *node_name,
397 ibnd_node_t * node, uint8_t * pc, int portnum,
398 int *header_printed, uint8_t *pce, uint16_t cap_mask)
405 for (n = 0, i = IB_PC_ERR_SYM_F; i <= IB_PC_VL15_DROPPED_F; i++) {
409 /* this is not a counter, skip it */
410 if (i == IB_PC_COUNTER_SELECT2_F)
413 mad_decode_field(pc, i, (void *)&val);
414 if (exceeds_threshold(i, val)) {
415 n += snprintf(str + n, 1024 - n, " [%s == %u]",
416 mad_field_name(i), val);
418 /* If there are PortXmitDiscards, get details (if supported) */
419 if (i == IB_PC_XMT_DISCARDS_F && details) {
420 n += query_and_dump(str + n, sizeof(buf) - n, portid,
422 "PortXmitDiscardDetails",
423 IB_GSI_PORT_XMIT_DISCARD_DETAILS,
424 IB_PC_RCV_LOCAL_PHY_ERR_F,
425 IB_PC_RCV_ERR_LAST_F);
426 /* If there are PortRcvErrors, get details (if supported) */
427 } else if (i == IB_PC_ERR_RCV_F && details) {
428 n += query_and_dump(str + n, sizeof(buf) - n, portid,
430 "PortRcvErrorDetails",
431 IB_GSI_PORT_RCV_ERROR_DETAILS,
432 IB_PC_XMT_INACT_DISC_F,
433 IB_PC_XMT_DISC_LAST_F);
438 if (!suppress(IB_PC_XMT_WAIT_F)) {
439 mad_decode_field(pc, IB_PC_XMT_WAIT_F, (void *)&val);
440 if (exceeds_threshold(IB_PC_XMT_WAIT_F, val))
441 n += snprintf(str + n, 1024 - n, " [%s == %u]",
442 mad_field_name(IB_PC_XMT_WAIT_F), val);
445 /* if we found errors. */
449 int start_field = IB_PC_XMT_BYTES_F;
450 int end_field = IB_PC_RCV_PKTS_F;
454 start_field = IB_PC_EXT_XMT_BYTES_F;
455 if (cap_mask & IB_PM_EXT_WIDTH_SUPPORTED)
456 end_field = IB_PC_EXT_RCV_MPKTS_F;
458 end_field = IB_PC_EXT_RCV_PKTS_F;
461 for (i = start_field; i <= end_field; i++) {
465 mad_decode_field(pkt, i, (void *)&val64);
468 if (i == IB_PC_EXT_XMT_BYTES_F ||
469 i == IB_PC_EXT_RCV_BYTES_F ||
470 i == IB_PC_XMT_BYTES_F ||
471 i == IB_PC_RCV_BYTES_F)
473 unit = conv_cnt_human_readable(val64,
475 n += snprintf(str + n, 1024 - n,
478 mad_field_name(i), val64, val,
484 if (!*header_printed) {
485 if (node->type == IB_NODE_SWITCH)
486 printf("Errors for 0x%" PRIx64 " \"%s\"\n",
487 node->ports[0]->guid, node_name);
489 printf("Errors for \"%s\"\n", node_name);
494 if (portnum == 0xFF) {
495 if (node->type == IB_NODE_SWITCH)
496 printf(" GUID 0x%" PRIx64 " port ALL:%s\n",
497 node->ports[0]->guid, str);
499 printf(" GUID 0x%" PRIx64 " port %d:%s\n",
500 node->ports[portnum]->guid, portnum, str);
502 print_port_config(node, portnum);
509 static int query_cap_mask(ib_portid_t * portid, char *node_name, int portnum,
512 uint8_t pc[1024] = { 0 };
513 uint16_t rc_cap_mask;
515 portid->sl = lid2sl_table[portid->lid];
517 /* PerfMgt ClassPortInfo is a required attribute */
518 if (!pma_query_via(pc, portid, portnum, ibd_timeout, CLASS_PORT_INFO,
520 IBWARN("classportinfo query failed on %s, %s port %d",
521 node_name, portid2str(portid), portnum);
522 summary.pma_query_failures++;
526 /* ClassPortInfo should be supported as part of libibmad */
527 memcpy(&rc_cap_mask, pc + 2, sizeof(rc_cap_mask)); /* CapabilityMask */
529 *cap_mask = rc_cap_mask;
533 static int print_data_cnts(ib_portid_t * portid, uint16_t cap_mask,
534 char *node_name, ibnd_node_t * node, int portnum,
539 int start_field = IB_PC_XMT_BYTES_F;
540 int end_field = IB_PC_RCV_PKTS_F;
544 portid->sl = lid2sl_table[portid->lid];
546 if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP)) {
547 if (!pma_query_via(pc, portid, portnum, ibd_timeout,
548 IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) {
549 IBWARN("IB_GSI_PORT_COUNTERS_EXT query failed on %s, %s port %d",
550 node_name, portid2str(portid), portnum);
551 summary.pma_query_failures++;
554 start_field = IB_PC_EXT_XMT_BYTES_F;
555 if (cap_mask & IB_PM_EXT_WIDTH_SUPPORTED)
556 end_field = IB_PC_EXT_RCV_MPKTS_F;
558 end_field = IB_PC_EXT_RCV_PKTS_F;
560 if (!pma_query_via(pc, portid, portnum, ibd_timeout,
561 IB_GSI_PORT_COUNTERS, ibmad_port)) {
562 IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d",
563 node_name, portid2str(portid), portnum);
564 summary.pma_query_failures++;
567 start_field = IB_PC_XMT_BYTES_F;
568 end_field = IB_PC_RCV_PKTS_F;
571 if (!*header_printed) {
572 printf("Data Counters for 0x%" PRIx64 " \"%s\"\n", node->guid,
578 printf(" GUID 0x%" PRIx64 " port ALL:", node->guid);
580 printf(" GUID 0x%" PRIx64 " port %d:",
581 node->guid, portnum);
583 for (i = start_field; i <= end_field; i++) {
588 mad_decode_field(pc, i, (void *)&val64);
589 if (i == IB_PC_EXT_XMT_BYTES_F || i == IB_PC_EXT_RCV_BYTES_F ||
590 i == IB_PC_XMT_BYTES_F || i == IB_PC_RCV_BYTES_F)
592 unit = conv_cnt_human_readable(val64, &val, data);
593 printf(" [%s == %" PRIu64 " (%5.3f%s)]", mad_field_name(i),
598 if (portnum != 0xFF && port_config)
599 print_port_config(node, portnum);
604 static int print_errors(ib_portid_t * portid, uint16_t cap_mask,
605 char *node_name, ibnd_node_t * node, int portnum,
610 uint8_t *pc_ext = NULL;
613 memset(pce, 0, 1024);
615 portid->sl = lid2sl_table[portid->lid];
617 if (!pma_query_via(pc, portid, portnum, ibd_timeout,
618 IB_GSI_PORT_COUNTERS, ibmad_port)) {
619 IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d",
620 node_name, portid2str(portid), portnum);
621 summary.pma_query_failures++;
625 if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP)) {
626 if (!pma_query_via(pce, portid, portnum, ibd_timeout,
627 IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) {
628 IBWARN("IB_GSI_PORT_COUNTERS_EXT query failed on %s, %s port %d",
629 node_name, portid2str(portid), portnum);
630 summary.pma_query_failures++;
636 if (!(cap_mask & IB_PM_PC_XMIT_WAIT_SUP)) {
637 /* if PortCounters:PortXmitWait not supported clear this counter */
639 mad_encode_field(pc, IB_PC_XMT_WAIT_F, &foo);
641 return (print_results(portid, node_name, node, pc, portnum,
642 header_printed, pc_ext, cap_mask));
645 uint8_t *reset_pc_ext(void *rcvbuf, ib_portid_t * dest,
646 int port, unsigned mask, unsigned timeout,
647 const struct ibmad_port * srcport)
649 ib_rpc_t rpc = { 0 };
652 DEBUG("lid %u port %d mask 0x%x", lid, port, mask);
655 IBWARN("only lid routed is supported");
662 rpc.mgtclass = IB_PERFORMANCE_CLASS;
663 rpc.method = IB_MAD_METHOD_SET;
664 rpc.attr.id = IB_GSI_PORT_COUNTERS_EXT;
666 memset(rcvbuf, 0, IB_MAD_SIZE);
668 /* Same for attribute IDs */
669 mad_set_field(rcvbuf, 0, IB_PC_EXT_PORT_SELECT_F, port);
670 mad_set_field(rcvbuf, 0, IB_PC_EXT_COUNTER_SELECT_F, mask);
672 rpc.timeout = timeout;
673 rpc.datasz = IB_PC_DATA_SZ;
674 rpc.dataoffs = IB_PC_DATA_OFFS;
678 dest->qkey = IB_DEFAULT_QP1_QKEY;
680 return mad_rpc(srcport, &rpc, dest, rcvbuf, rcvbuf);
683 static void clear_port(ib_portid_t * portid, uint16_t cap_mask,
684 char *node_name, int port)
686 uint8_t pc[1024] = { 0 };
687 /* bits defined in Table 228 PortCounters CounterSelect and
694 if (cap_mask & IB_PM_PC_XMIT_WAIT_SUP)
701 if (!performance_reset_via(pc, portid, port, mask, ibd_timeout,
702 IB_GSI_PORT_COUNTERS, ibmad_port))
703 fprintf(stderr, "Failed to reset errors %s port %d\n", node_name,
706 if (clear_errors && details) {
708 performance_reset_via(pc, portid, port, 0xf, ibd_timeout,
709 IB_GSI_PORT_XMIT_DISCARD_DETAILS,
712 performance_reset_via(pc, portid, port, 0x3f, ibd_timeout,
713 IB_GSI_PORT_RCV_ERROR_DETAILS,
719 (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP))) {
720 if (cap_mask & IB_PM_EXT_WIDTH_SUPPORTED)
725 if (!reset_pc_ext(pc, portid, port, mask, ibd_timeout,
727 fprintf(stderr, "Failed to reset extended data counters %s, "
728 "%s port %d\n", node_name, portid2str(portid),
733 void print_node(ibnd_node_t * node, void *user_data)
735 int header_printed = 0;
739 int all_port_sup = 0;
740 ib_portid_t portid = { 0 };
741 uint16_t cap_mask = 0;
742 char *node_name = NULL;
744 switch (node->type) {
756 if ((type & node_type_to_print) == 0)
759 if (node->type == IB_NODE_SWITCH && node->smaenhsp0)
762 node_name = remap_node_name(node_name_map, node->guid, node->nodedesc);
764 if (node->type == IB_NODE_SWITCH) {
765 ib_portid_set(&portid, node->smalid, 0, 0);
768 for (p = 1; p <= node->numports; p++) {
769 if (node->ports[p]) {
770 ib_portid_set(&portid,
771 node->ports[p]->base_lid,
778 if ((query_cap_mask(&portid, node_name, p, &cap_mask) == 0) &&
779 (cap_mask & IB_PM_ALL_PORT_SELECT))
782 if (data_counters_only) {
783 for (p = startport; p <= node->numports; p++) {
784 if (node->ports[p]) {
785 if (node->type == IB_NODE_SWITCH)
786 ib_portid_set(&portid, node->smalid, 0, 0);
788 ib_portid_set(&portid, node->ports[p]->base_lid,
791 print_data_cnts(&portid, cap_mask, node_name, node, p,
793 summary.ports_checked++;
795 clear_port(&portid, cap_mask, node_name, p);
800 if (!print_errors(&portid, cap_mask, node_name, node,
801 0xFF, &header_printed)) {
802 summary.ports_checked += node->numports;
806 for (p = startport; p <= node->numports; p++) {
807 if (node->ports[p]) {
808 if (node->type == IB_NODE_SWITCH)
809 ib_portid_set(&portid, node->smalid, 0, 0);
811 ib_portid_set(&portid, node->ports[p]->base_lid,
814 print_errors(&portid, cap_mask, node_name, node, p,
816 summary.ports_checked++;
818 clear_port(&portid, cap_mask, node_name, p);
824 summary.nodes_checked++;
826 clear_port(&portid, cap_mask, node_name, 0xFF);
831 static void add_suppressed(enum MAD_FIELDS field)
833 if (sup_total >= SUP_MAX) {
834 IBWARN("Maximum (%d) fields have been suppressed; skipping %s",
835 sup_total, mad_field_name(field));
838 suppressed_fields[sup_total++] = field;
841 static void calculate_suppressed_fields(char *str)
844 char *val, *lasts = NULL;
845 char *tmp = strdup(str);
847 val = strtok_r(tmp, ",", &lasts);
849 for (f = IB_PC_FIRST_F; f <= IB_PC_LAST_F; f++)
850 if (strcmp(val, mad_field_name(f)) == 0)
852 val = strtok_r(NULL, ",", &lasts);
858 static int process_opt(void *context, int ch, char *optarg)
860 struct ibnd_config *cfg = context;
863 calculate_suppressed_fields(optarg);
866 /* Right now this is the only "common" error */
867 add_suppressed(IB_PC_ERR_SWITCH_REL_F);
870 node_name_map_file = strdup(optarg);
876 node_type_to_print |= PRINT_SWITCH;
879 node_type_to_print |= PRINT_CA;
882 node_type_to_print |= PRINT_ROUTER;
888 load_cache_file = strdup(optarg);
891 threshold_file = strdup(optarg);
894 data_counters_only = 1;
901 port_guid_str = optarg;
902 port_guid = strtoull(optarg, 0, 0);
905 dr_path = strdup(optarg);
919 cfg->max_smps = strtoul(optarg, NULL, 0);
928 int main(int argc, char **argv)
930 struct ibnd_config config = { 0 };
932 ib_portid_t portid = { 0 };
933 ib_portid_t self_portid = { 0 };
935 ibnd_fabric_t *fabric = NULL;
939 int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS,
943 const struct ibdiag_opt opts[] = {
944 {"suppress", 's', 1, "<err1,err2,...>",
945 "suppress errors listed"},
946 {"suppress-common", 'c', 0, NULL,
947 "suppress some of the common counters"},
948 {"node-name-map", 1, 1, "<file>", "node name map file"},
949 {"port-guid", 'G', 1, "<port_guid>",
950 "report the node containing the port specified by <port_guid>"},
951 {"", 'S', 1, "<port_guid>",
952 "Same as \"-G\" for backward compatibility"},
953 {"Direct", 'D', 1, "<dr_path>",
954 "report the node containing the port specified by <dr_path>"},
955 {"skip-sl", 10, 0, NULL,"don't obtain SL to all destinations"},
956 {"report-port", 'r', 0, NULL,
957 "report port link information"},
958 {"threshold-file", 8, 1, NULL,
959 "specify an alternate threshold file, default: " DEF_THRES_FILE},
960 {"GNDN", 'R', 0, NULL,
961 "(This option is obsolete and does nothing)"},
962 {"data", 2, 0, NULL, "include data counters for ports with errors"},
963 {"switch", 3, 0, NULL, "print data for switches only"},
964 {"ca", 4, 0, NULL, "print data for CA's only"},
965 {"router", 5, 0, NULL, "print data for routers only"},
966 {"details", 6, 0, NULL, "include transmit discard details"},
967 {"counters", 9, 0, NULL, "print data counters only"},
968 {"clear-errors", 'k', 0, NULL,
969 "Clear error counters after read"},
970 {"clear-counts", 'K', 0, NULL,
971 "Clear data counters after read"},
972 {"load-cache", 7, 1, "<file>",
973 "filename of ibnetdiscover cache to load"},
974 {"outstanding_smps", 'o', 1, NULL,
975 "specify the number of outstanding SMP's which should be "
976 "issued during the scan"},
979 char usage_args[] = "";
981 memset(suppressed_fields, 0, sizeof suppressed_fields);
982 ibdiag_process_opts(argc, argv, &config, "cDGKLnRrSs", opts, process_opt,
988 if (!node_type_to_print)
989 node_type_to_print = PRINT_ALL;
991 ibmad_port = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 4);
993 IBEXIT("Failed to open port; %s:%d\n", ibd_ca, ibd_ca_port);
995 smp_mkey_set(ibmad_port, ibd_mkey);
998 mad_rpc_set_timeout(ibmad_port, ibd_timeout);
999 config.timeout_ms = ibd_timeout;
1002 config.flags = ibd_ibnetdisc_flags;
1003 config.mkey = ibd_mkey;
1005 if (dr_path && load_cache_file) {
1006 mad_rpc_close_port(ibmad_port);
1007 fprintf(stderr, "Cannot specify cache and direct route path\n");
1011 if (resolve_self(ibd_ca, ibd_ca_port, &self_portid, &port, &self_gid.raw) < 0) {
1012 mad_rpc_close_port(ibmad_port);
1013 IBEXIT("can't resolve self port %s", argv[0]);
1016 node_name_map = open_node_name_map(node_name_map_file);
1018 /* limit the scan the fabric around the target */
1021 resolve_portid_str(ibd_ca, ibd_ca_port, &portid, dr_path,
1022 IB_DEST_DRPATH, NULL, ibmad_port)) < 0)
1023 IBWARN("Failed to resolve %s; attempting full scan",
1025 } else if (port_guid_str) {
1027 resolve_portid_str(ibd_ca, ibd_ca_port, &portid,
1028 port_guid_str, IB_DEST_GUID, ibd_sm_id,
1030 IBWARN("Failed to resolve %s; attempting full scan",
1033 lid2sl_table[portid.lid] = portid.sl;
1036 mad_rpc_close_port(ibmad_port);
1038 if (load_cache_file) {
1039 if ((fabric = ibnd_load_fabric(load_cache_file, 0)) == NULL) {
1040 fprintf(stderr, "loading cached fabric failed\n");
1045 if (resolved >= 0) {
1046 if (!config.max_hops)
1047 config.max_hops = 1;
1048 if (!(fabric = ibnd_discover_fabric(ibd_ca, ibd_ca_port,
1050 IBWARN("Single node discover failed;"
1051 " attempting full scan");
1054 if (!fabric && !(fabric = ibnd_discover_fabric(ibd_ca,
1058 fprintf(stderr, "discover failed\n");
1064 set_thresholds(threshold_file);
1066 /* reopen the global ibmad_port */
1067 ibmad_port = mad_rpc_open_port(ibd_ca, ibd_ca_port,
1070 ibnd_destroy_fabric(fabric);
1071 close_node_name_map(node_name_map);
1072 IBEXIT("Failed to reopen port: %s:%d\n",
1073 ibd_ca, ibd_ca_port);
1076 smp_mkey_set(ibmad_port, ibd_mkey);
1079 mad_rpc_set_timeout(ibmad_port, ibd_timeout);
1081 if (port_guid_str) {
1082 ibnd_port_t *port = ibnd_find_port_guid(fabric, port_guid);
1084 print_node(port->node, NULL);
1086 fprintf(stderr, "Failed to find node: %s\n",
1088 } else if (dr_path) {
1090 uint8_t ni[IB_SMP_DATA_SIZE] = { 0 };
1091 if (!smp_query_via(ni, &portid, IB_ATTR_NODE_INFO, 0,
1092 ibd_timeout, ibmad_port)) {
1093 fprintf(stderr, "Failed to query local Node Info\n");
1094 goto destroy_fabric;
1097 mad_decode_field(ni, IB_NODE_PORT_GUID_F, &(port_guid));
1099 port = ibnd_find_port_guid(fabric, port_guid);
1102 if(path_record_query(self_gid,port->guid))
1103 goto destroy_fabric;
1104 print_node(port->node, NULL);
1106 fprintf(stderr, "Failed to find node: %s\n", dr_path);
1109 if(path_record_query(self_gid,0))
1110 goto destroy_fabric;
1112 ibnd_iter_nodes(fabric, print_node, NULL);
1115 rc = print_summary();
1120 mad_rpc_close_port(ibmad_port);
1121 ibnd_destroy_fabric(fabric);
1124 close_node_name_map(node_name_map);