2 * Copyright (c) 2004-2009 Voltaire Inc. All rights reserved.
3 * Copyright (c) 2009 HNR Consulting. All rights reserved.
4 * Copyright (c) 2010,2011 Mellanox Technologies LTD. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38 #endif /* HAVE_CONFIG_H */
46 #include <netinet/in.h>
49 #include <infiniband/umad.h>
50 #include <infiniband/mad.h>
51 #include <complib/cl_nodenamemap.h>
53 #include "ibdiag_common.h"
55 struct ibmad_port *srcport;
59 static char *node_type_str[] = {
67 static int timeout = 0; /* ms */
71 static char *node_name_map_file = NULL;
72 static nn_map_t *node_name_map = NULL;
74 typedef struct Port Port;
75 typedef struct Switch Switch;
76 typedef struct Node Node;
110 uint64_t nodeguid; /* also portguid */
115 Node *nodesdist[MAXHOPS];
116 uint64_t target_portguid;
120 * Checks whether or not the port state is other than active.
121 * The "sw" argument is only relevant when the port is on a
122 * switch; for HCAs and routers, this argument is ignored.
123 * Returns 1 when port is not active and 0 when active.
124 * Base switch port 0 is considered always active.
126 static int is_port_inactive(Node * node, Port * port, Switch * sw)
129 if (port->state != 4 &&
130 (node->type != IB_NODE_SWITCH ||
131 (node->type == IB_NODE_SWITCH && sw->enhsp0)))
136 static int get_node(Node * node, Port * port, ib_portid_t * portid)
138 void *pi = port->portinfo, *ni = node->nodeinfo, *nd = node->nodedesc;
141 memset(ni, 0, sizeof(node->nodeinfo));
142 if (!smp_query_via(ni, portid, IB_ATTR_NODE_INFO, 0, timeout, srcport))
145 memset(nd, 0, sizeof(node->nodedesc));
146 if (!smp_query_via(nd, portid, IB_ATTR_NODE_DESC, 0, timeout, srcport))
149 for (s = nd, e = s + 64; s < e; s++) {
156 memset(pi, 0, sizeof(port->portinfo));
157 if (!smp_query_via(pi, portid, IB_ATTR_PORT_INFO, 0, timeout, srcport))
160 mad_decode_field(ni, IB_NODE_GUID_F, &node->nodeguid);
161 mad_decode_field(ni, IB_NODE_TYPE_F, &node->type);
162 mad_decode_field(ni, IB_NODE_NPORTS_F, &node->numports);
164 mad_decode_field(ni, IB_NODE_PORT_GUID_F, &port->portguid);
165 mad_decode_field(ni, IB_NODE_LOCAL_PORT_F, &port->portnum);
166 mad_decode_field(pi, IB_PORT_LID_F, &port->lid);
167 mad_decode_field(pi, IB_PORT_LMC_F, &port->lmc);
168 mad_decode_field(pi, IB_PORT_STATE_F, &port->state);
170 DEBUG("portid %s: got node %" PRIx64 " '%s'", portid2str(portid),
171 node->nodeguid, node->nodedesc);
175 static int switch_lookup(Switch * sw, ib_portid_t * portid, int lid)
177 void *si = sw->switchinfo, *fdb = sw->fdb;
179 memset(si, 0, sizeof(sw->switchinfo));
180 if (!smp_query_via(si, portid, IB_ATTR_SWITCH_INFO, 0, timeout,
184 mad_decode_field(si, IB_SW_LINEAR_FDB_CAP_F, &sw->linearcap);
185 mad_decode_field(si, IB_SW_LINEAR_FDB_TOP_F, &sw->linearFDBtop);
186 mad_decode_field(si, IB_SW_ENHANCED_PORT0_F, &sw->enhsp0);
188 if (lid >= sw->linearcap && lid > sw->linearFDBtop)
191 memset(fdb, 0, sizeof(sw->fdb));
192 if (!smp_query_via(fdb, portid, IB_ATTR_LINEARFORWTBL, lid / 64,
196 DEBUG("portid %s: forward lid %d to port %d",
197 portid2str(portid), lid, sw->fdb[lid % 64]);
198 return sw->fdb[lid % 64];
201 static int sameport(Port * a, Port * b)
203 return a->portguid == b->portguid || (force && a->lid == b->lid);
206 static int extend_dpath(ib_dr_path_t * path, int nextport)
208 if (path->cnt + 2 >= sizeof(path->p))
211 path->p[path->cnt] = (uint8_t) nextport;
215 static void dump_endnode(int dump, char *prompt, Node * node, Port * port)
217 char *nodename = NULL;
222 fprintf(f, "%s {0x%016" PRIx64 "}[%d]\n",
223 prompt, node->nodeguid,
224 node->type == IB_NODE_SWITCH ? 0 : port->portnum);
229 remap_node_name(node_name_map, node->nodeguid, node->nodedesc);
231 fprintf(f, "%s %s {0x%016" PRIx64 "} portnum %d lid %u-%u \"%s\"\n",
233 (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"),
235 node->type == IB_NODE_SWITCH ? 0 : port->portnum, port->lid,
236 port->lid + (1 << port->lmc) - 1, nodename);
241 static void dump_route(int dump, Node * node, int outport, Port * port)
243 char *nodename = NULL;
245 if (!dump && !ibverbose)
249 remap_node_name(node_name_map, node->nodeguid, node->nodedesc);
252 fprintf(f, "[%d] -> {0x%016" PRIx64 "}[%d]\n",
253 outport, port->portguid, port->portnum);
255 fprintf(f, "[%d] -> %s port {0x%016" PRIx64
256 "}[%d] lid %u-%u \"%s\"\n", outport,
258 IB_NODE_MAX ? node_type_str[node->type] : "???"),
259 port->portguid, port->portnum, port->lid,
260 port->lid + (1 << port->lmc) - 1, nodename);
265 static int find_route(ib_portid_t * from, ib_portid_t * to, int dump)
267 Node *node, fromnode, tonode, nextnode;
268 Port *port, fromport, toport, nextport;
270 int maxhops = MAXHOPS;
271 int portnum, outport = 255, next_sw_outport = 255;
273 memset(&fromnode,0,sizeof(Node));
274 memset(&tonode,0,sizeof(Node));
275 memset(&nextnode,0,sizeof(Node));
276 memset(&fromport,0,sizeof(Port));
277 memset(&toport,0,sizeof(Port));
278 memset(&nextport,0,sizeof(Port));
280 DEBUG("from %s", portid2str(from));
282 if (get_node(&fromnode, &fromport, from) < 0 ||
283 get_node(&tonode, &toport, to) < 0) {
284 IBWARN("can't reach to/from ports");
288 toport.lid = to->lid;
289 IBWARN("Force: look for lid %d", to->lid);
294 portnum = port->portnum;
296 dump_endnode(dump, "From", node, port);
297 if (node->type == IB_NODE_SWITCH) {
298 next_sw_outport = switch_lookup(&sw, from, to->lid);
299 if (next_sw_outport < 0 || next_sw_outport > node->numports) {
300 /* Need to print the port in badtbl */
301 outport = next_sw_outport;
307 if (is_port_inactive(node, port, &sw))
310 if (sameport(port, &toport))
313 if (node->type == IB_NODE_SWITCH) {
314 DEBUG("switch node");
315 outport = next_sw_outport;
317 if (extend_dpath(&from->drpath, outport) < 0)
320 if (get_node(&nextnode, &nextport, from) < 0) {
321 IBWARN("can't reach port at %s",
326 if (!sameport(&nextport, &toport))
329 break; /* found SMA port */
331 } else if ((node->type == IB_NODE_CA) ||
332 (node->type == IB_NODE_ROUTER)) {
336 DEBUG("ca or router node");
337 if (!sameport(port, &fromport)) {
339 ("can't continue: reached CA or router port %"
340 PRIx64 ", lid %d", port->portguid,
344 /* we are at CA or router "from" - go one hop back to (hopefully) a switch */
345 if (from->drpath.cnt > 0) {
346 DEBUG("ca or router node - return back 1 hop");
351 && extend_dpath(&from->drpath, portnum) < 0)
354 if (get_node(&nextnode, &nextport, from) < 0) {
355 IBWARN("can't reach port at %s",
359 /* fix port num to be seen from the CA or router side */
362 from->drpath.p[from->drpath.cnt + 1];
364 /* only if the next node is a switch, get switch info */
365 if (nextnode.type == IB_NODE_SWITCH) {
366 next_sw_outport = switch_lookup(&sw, from, to->lid);
367 if (next_sw_outport < 0 ||
368 next_sw_outport > nextnode.numports) {
369 /* needed to print the port in badtbl */
370 outport = next_sw_outport;
376 if (is_port_inactive(&nextnode, port, &sw))
379 portnum = port->portnum;
380 dump_route(dump, node, outport, port);
384 IBWARN("no route found after %d hops", MAXHOPS);
387 dump_endnode(dump, "To", node, port);
391 IBWARN("Bad port state found: node \"%s\" port %d state %d",
392 clean_nodedesc(node->nodedesc), portnum, port->state);
395 IBWARN("Bad out port state found: node \"%s\" outport %d state %d",
396 clean_nodedesc(node->nodedesc), outport, port->state);
400 ("Bad forwarding table entry found at: node \"%s\" lid entry %d is %d (top %d)",
401 clean_nodedesc(node->nodedesc), to->lid, outport, sw.linearFDBtop);
404 IBWARN("Direct path too long!");
408 /**************************
412 #define HASHGUID(guid) ((uint32_t)(((uint32_t)(guid) * 101) ^ ((uint32_t)((guid) >> 32) * 103)))
415 static int insert_node(Node * new)
417 static Node *nodestbl[HTSZ];
418 int hash = HASHGUID(new->nodeguid) % HTSZ;
421 for (node = nodestbl[hash]; node; node = node->htnext)
422 if (node->nodeguid == new->nodeguid) {
423 DEBUG("node %" PRIx64 " already exists", new->nodeguid);
427 new->htnext = nodestbl[hash];
428 nodestbl[hash] = new;
433 static int get_port(Port * port, int portnum, ib_portid_t * portid)
435 char portinfo[64] = { 0 };
438 port->portnum = portnum;
440 if (!smp_query_via(pi, portid, IB_ATTR_PORT_INFO, portnum, timeout,
444 mad_decode_field(pi, IB_PORT_LID_F, &port->lid);
445 mad_decode_field(pi, IB_PORT_LMC_F, &port->lmc);
446 mad_decode_field(pi, IB_PORT_STATE_F, &port->state);
447 mad_decode_field(pi, IB_PORT_PHYS_STATE_F, &port->physstate);
449 VERBOSE("portid %s portnum %d: lid %d state %d physstate %d",
450 portid2str(portid), portnum, port->lid, port->state,
455 static void link_port(Port * port, Node * node)
457 port->next = node->ports;
461 static int new_node(Node * node, Port * port, ib_portid_t * path, int dist)
463 if (port->portguid == target_portguid) {
464 node->dist = -1; /* tag as target */
465 link_port(port, node);
466 dump_endnode(ibverbose, "found target", node, port);
467 return 1; /* found; */
470 /* BFS search start with my self */
471 if (insert_node(node) < 0)
472 return -1; /* known switch */
474 VERBOSE("insert dist %d node %p port %d lid %d", dist, node,
475 port->portnum, port->lid);
477 link_port(port, node);
481 node->dnext = nodesdist[dist];
482 nodesdist[dist] = node;
487 static int switch_mclookup(Node * node, ib_portid_t * portid, int mlid,
492 void *si = sw.switchinfo;
493 uint16_t *msets = (uint16_t *) mdb;
494 int maxsets, block, i, set;
498 memset(si, 0, sizeof(sw.switchinfo));
499 if (!smp_query_via(si, portid, IB_ATTR_SWITCH_INFO, 0, timeout,
505 mad_decode_field(si, IB_SW_MCAST_FDB_CAP_F, &sw.mccap);
507 if (mlid >= sw.mccap)
511 maxsets = (node->numports + 15) / 16; /* round up */
513 for (set = 0; set < maxsets; set++) {
514 memset(mdb, 0, sizeof(mdb));
515 if (!smp_query_via(mdb, portid, IB_ATTR_MULTICASTFORWTBL,
516 block | (set << 28), timeout, srcport))
519 for (i = 0; i < 16; i++, map++) {
520 uint16_t mask = ntohs(msets[mlid % 32]);
525 VERBOSE("Switch guid 0x%" PRIx64
526 ": mlid 0x%x is forwarded to port %d",
527 node->nodeguid, mlid + 0xc000, i + set * 16);
535 * Return 1 if found, 0 if not, -1 on errors.
537 static Node *find_mcpath(ib_portid_t * from, int mlid)
539 Node *node, *remotenode;
540 Port *port, *remoteport;
543 int dist = 0, leafport = 0;
546 DEBUG("from %s", portid2str(from));
548 if (!(node = calloc(1, sizeof(Node))))
549 IBEXIT("out of memory");
551 if (!(port = calloc(1, sizeof(Port))))
552 IBEXIT("out of memory");
554 if (get_node(node, port, from) < 0) {
555 IBWARN("can't reach node %s", portid2str(from));
559 node->upnode = 0; /* root */
560 if ((r = new_node(node, port, from, 0)) > 0) {
561 if (node->type != IB_NODE_SWITCH) {
562 IBWARN("ibtracert from CA to CA is unsupported");
563 return 0; /* ibtracert from host to itself is unsupported */
566 if (switch_mclookup(node, from, mlid, map) < 0 || !map[0])
571 for (dist = 0; dist < MAXHOPS; dist++) {
573 for (node = nodesdist[dist]; node; node = node->dnext) {
577 VERBOSE("dist %d node %p", dist, node);
578 dump_endnode(ibverbose, "processing", node,
581 memset(map, 0, sizeof(map));
583 if (node->type != IB_NODE_SWITCH) {
586 leafport = path->drpath.p[path->drpath.cnt];
587 map[port->portnum] = 1;
588 node->upport = 0; /* starting here */
589 DEBUG("Starting from CA 0x%" PRIx64
590 " lid %d port %d (leafport %d)",
591 node->nodeguid, port->lid, port->portnum,
593 } else { /* switch */
595 /* if starting from a leaf port fix up port (up port) */
596 if (dist == 1 && leafport)
597 node->upport = leafport;
599 if (switch_mclookup(node, path, mlid, map) < 0) {
600 IBWARN("skipping bad Switch 0x%" PRIx64
606 for (i = 1; i <= node->numports; i++) {
607 if (!map[i] || i == node->upport)
610 if (dist == 0 && leafport) {
611 if (from->drpath.cnt > 0)
614 if (!(port = calloc(1, sizeof(Port))))
615 IBEXIT("out of memory");
617 if (get_port(port, i, path) < 0) {
619 ("can't reach node %s port %d",
620 portid2str(path), i);
625 if (port->physstate != 5) { /* LinkUP */
630 link_port(port, node);
633 if (extend_dpath(&path->drpath, i) < 0) {
639 if (!(remotenode = calloc(1, sizeof(Node))))
640 IBEXIT("out of memory");
642 if (!(remoteport = calloc(1, sizeof(Port))))
643 IBEXIT("out of memory");
645 if (get_node(remotenode, remoteport, path) < 0) {
647 ("NodeInfo on %s port %d failed, skipping port",
648 portid2str(path), i);
649 path->drpath.cnt--; /* restore path */
655 remotenode->upnode = node;
656 remotenode->upport = remoteport->portnum;
657 remoteport->remoteport = port;
659 if ((r = new_node(remotenode, remoteport, path,
664 dump_endnode(ibverbose, "new remote",
665 remotenode, remoteport);
666 else if (remotenode->type == IB_NODE_SWITCH)
668 "ERR: circle discovered at",
669 remotenode, remoteport);
671 path->drpath.cnt--; /* restore path */
676 return 0; /* not found */
679 static uint64_t find_target_portguid(ib_portid_t * to)
684 if (get_node(&tonode, &toport, to) < 0) {
685 IBWARN("can't find to port\n");
689 return toport.portguid;
692 static void dump_mcpath(Node * node, int dumplevel)
694 char *nodename = NULL;
697 dump_mcpath(node->upnode, dumplevel);
700 remap_node_name(node_name_map, node->nodeguid, node->nodedesc);
703 printf("From %s 0x%" PRIx64 " port %d lid %u-%u \"%s\"\n",
705 IB_NODE_MAX ? node_type_str[node->type] : "???"),
706 node->nodeguid, node->ports->portnum, node->ports->lid,
707 node->ports->lid + (1 << node->ports->lmc) - 1,
714 printf("[%d] -> %s {0x%016" PRIx64 "}[%d]\n",
715 node->ports->remoteport->portnum,
717 IB_NODE_MAX ? node_type_str[node->type] :
718 "???"), node->nodeguid, node->upport);
720 printf("[%d] -> %s 0x%" PRIx64 "[%d] lid %u \"%s\"\n",
721 node->ports->remoteport->portnum,
723 IB_NODE_MAX ? node_type_str[node->type] :
724 "???"), node->nodeguid, node->upport,
725 node->ports->lid, nodename);
730 printf("To %s 0x%" PRIx64 " port %d lid %u-%u \"%s\"\n",
732 IB_NODE_MAX ? node_type_str[node->type] : "???"),
733 node->nodeguid, node->ports->portnum, node->ports->lid,
734 node->ports->lid + (1 << node->ports->lmc) - 1,
741 static int resolve_lid(ib_portid_t * portid, const void *srcport)
743 uint8_t portinfo[64] = { 0 };
746 if (!smp_query_via(portinfo, portid, IB_ATTR_PORT_INFO, 0, 0, srcport))
748 mad_decode_field(portinfo, IB_PORT_LID_F, &lid);
750 ib_portid_set(portid, lid, 0, 0);
755 static int dumplevel = 2, multicast, mlid;
757 static int process_opt(void *context, int ch, char *optarg)
761 node_name_map_file = strdup(optarg);
765 mlid = strtoul(optarg, 0, 0);
779 int main(int argc, char **argv)
781 int mgmt_classes[3] =
782 { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS };
783 ib_portid_t my_portid = { 0 };
784 ib_portid_t src_portid = { 0 };
785 ib_portid_t dest_portid = { 0 };
788 const struct ibdiag_opt opts[] = {
789 {"force", 'f', 0, NULL, "force"},
790 {"no_info", 'n', 0, NULL, "simple format"},
791 {"mlid", 'm', 1, "<mlid>", "multicast trace of the mlid"},
792 {"node-name-map", 1, 1, "<file>", "node name map file"},
795 char usage_args[] = "<src-addr> <dest-addr>";
796 const char *usage_examples[] = {
797 "- Unicast examples:",
798 "4 16\t\t\t# show path between lids 4 and 16",
799 "-n 4 16\t\t# same, but using simple output format",
800 "-G 0x8f1040396522d 0x002c9000100d051\t# use guid addresses",
802 " - Multicast examples:",
803 "-m 0xc000 4 16\t# show multicast path of mlid 0xc000 between lids 4 and 16",
807 ibdiag_process_opts(argc, argv, NULL, "DK", opts, process_opt,
808 usage_args, usage_examples);
818 timeout = ibd_timeout;
820 srcport = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 3);
822 IBEXIT("Failed to open '%s' port '%d'", ibd_ca, ibd_ca_port);
824 smp_mkey_set(srcport, ibd_mkey);
826 node_name_map = open_node_name_map(node_name_map_file);
828 if (resolve_portid_str(ibd_ca, ibd_ca_port, &src_portid, argv[0],
829 ibd_dest_type, ibd_sm_id, srcport) < 0)
830 IBEXIT("can't resolve source port %s", argv[0]);
832 if (resolve_portid_str(ibd_ca, ibd_ca_port, &dest_portid, argv[1],
833 ibd_dest_type, ibd_sm_id, srcport) < 0)
834 IBEXIT("can't resolve destination port %s", argv[1]);
836 if (ibd_dest_type == IB_DEST_DRPATH) {
837 if (resolve_lid(&src_portid, NULL) < 0)
838 IBEXIT("cannot resolve lid for port \'%s\'",
839 portid2str(&src_portid));
840 if (resolve_lid(&dest_portid, NULL) < 0)
841 IBEXIT("cannot resolve lid for port \'%s\'",
842 portid2str(&dest_portid));
845 if (dest_portid.lid == 0 || src_portid.lid == 0) {
846 IBWARN("bad src/dest lid");
850 if (ibd_dest_type != IB_DEST_DRPATH) {
851 /* first find a direct path to the src port */
852 if (find_route(&my_portid, &src_portid, 0) < 0)
853 IBEXIT("can't find a route to the src port");
855 src_portid = my_portid;
859 if (find_route(&src_portid, &dest_portid, dumplevel) < 0)
860 IBEXIT("can't find a route from src to dest");
864 IBWARN("invalid MLID; must be 0xc000 or larger");
867 if (!(target_portguid = find_target_portguid(&dest_portid)))
868 IBEXIT("can't reach target lid %d", dest_portid.lid);
870 if (!(endnode = find_mcpath(&src_portid, mlid)))
871 IBEXIT("can't find a multicast route from src to dest");
873 /* dump multicast path */
874 dump_mcpath(endnode, dumplevel);
876 close_node_name_map(node_name_map);
878 mad_rpc_close_port(srcport);