2 * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38 * Implementation of osm_ni_rcv_t.
39 * This object represents the NodeInfo Receiver object.
40 * This object is part of the opensm family of objects.
45 #endif /* HAVE_CONFIG_H */
49 #include <iba/ib_types.h>
50 #include <complib/cl_qmap.h>
51 #include <complib/cl_passivelock.h>
52 #include <complib/cl_debug.h>
53 #include <opensm/osm_madw.h>
54 #include <opensm/osm_log.h>
55 #include <opensm/osm_node.h>
56 #include <opensm/osm_subnet.h>
57 #include <opensm/osm_router.h>
58 #include <opensm/osm_mad_pool.h>
59 #include <opensm/osm_helper.h>
60 #include <opensm/osm_msgdef.h>
61 #include <opensm/osm_opensm.h>
62 #include <opensm/osm_ucast_mgr.h>
65 report_duplicated_guid(IN osm_sm_t * sm,
66 osm_physp_t * p_physp,
67 osm_node_t * p_neighbor_node, const uint8_t port_num)
69 osm_physp_t *p_old, *p_new;
72 p_old = p_physp->p_remote_physp;
73 p_new = osm_node_get_physp_ptr(p_neighbor_node, port_num);
75 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D01: "
76 "Found duplicated node.\n"
77 "Node 0x%" PRIx64 " port %u is reachable from remote node "
78 "0x%" PRIx64 " port %u and remote node 0x%" PRIx64 " port %u.\n"
80 cl_ntoh64(p_physp->p_node->node_info.node_guid),
82 cl_ntoh64(p_old->p_node->node_info.node_guid), p_old->port_num,
83 cl_ntoh64(p_new->p_node->node_info.node_guid), p_new->port_num);
85 osm_dump_dr_path(sm->p_log, osm_physp_get_dr_path_ptr(p_physp),
88 path = *osm_physp_get_dr_path_ptr(p_new);
89 osm_dr_path_extend(&path, port_num);
90 osm_dump_dr_path(sm->p_log, &path, OSM_LOG_ERROR);
92 osm_log(sm->p_log, OSM_LOG_SYS,
93 "FATAL: duplicated guids or 12x lane reversal\n");
96 static void requery_dup_node_info(IN osm_sm_t * sm,
97 osm_physp_t * p_physp, unsigned count)
99 osm_madw_context_t context;
103 path = *osm_physp_get_dr_path_ptr(p_physp->p_remote_physp);
104 osm_dr_path_extend(&path, p_physp->p_remote_physp->port_num);
106 context.ni_context.node_guid =
107 p_physp->p_remote_physp->p_node->node_info.port_guid;
108 context.ni_context.port_num = p_physp->p_remote_physp->port_num;
109 context.ni_context.dup_node_guid = p_physp->p_node->node_info.node_guid;
110 context.ni_context.dup_port_num = p_physp->port_num;
111 context.ni_context.dup_count = count;
113 status = osm_req_get(sm, &path, IB_MAD_ATTR_NODE_INFO,
114 0, CL_DISP_MSGID_NONE, &context);
116 if (status != IB_SUCCESS)
117 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D02: "
118 "Failure initiating NodeInfo request (%s)\n",
119 ib_get_err_str(status));
122 /**********************************************************************
123 The plock must be held before calling this function.
124 **********************************************************************/
126 __osm_ni_rcv_set_links(IN osm_sm_t * sm,
128 const uint8_t port_num,
129 const osm_ni_context_t * const p_ni_context)
131 osm_node_t *p_neighbor_node;
132 osm_physp_t *p_physp;
134 OSM_LOG_ENTER(sm->p_log);
137 A special case exists in which the node we're trying to
138 link is our own node. In this case, the guid value in
139 the ni_context will be zero.
141 if (p_ni_context->node_guid == 0) {
142 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
143 "Nothing to link for our own node 0x%" PRIx64 "\n",
144 cl_ntoh64(osm_node_get_node_guid(p_node)));
148 p_neighbor_node = osm_get_node_by_guid(sm->p_subn,
149 p_ni_context->node_guid);
150 if (!p_neighbor_node) {
151 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D10: "
152 "Unexpected removal of neighbor node "
153 "0x%" PRIx64 "\n", cl_ntoh64(p_ni_context->node_guid));
158 We have seen this neighbor node before, but we might
159 not have seen this port on the neighbor node before.
160 We should not set links to an uninitialized port on the
161 neighbor, so check validity up front. If it's not
162 valid, do nothing, since we'll see this link again
163 when we probe the neighbor.
165 if (!osm_node_link_has_valid_ports(p_node, port_num,
167 p_ni_context->port_num))
170 if (osm_node_link_exists(p_node, port_num,
171 p_neighbor_node, p_ni_context->port_num)) {
172 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Link already exists\n");
176 if (osm_node_has_any_link(p_node, port_num) &&
177 sm->p_subn->force_heavy_sweep == FALSE &&
178 (!p_ni_context->dup_count ||
179 (p_ni_context->dup_node_guid == osm_node_get_node_guid(p_node) &&
180 p_ni_context->dup_port_num == port_num))) {
183 This could be reconnected ports, but also duplicated GUID
184 (2 nodes have the same guid) or a 12x link with lane reversal
185 that is not configured correctly.
186 We will try to recover by querying NodeInfo again.
187 In order to catch even fast port moving to new location(s) and
188 back we will count up to 5.
189 Some crazy reconnections (newly created switch loop right before
190 targeted CA) will not be catched this way. So in worst case -
191 report GUID duplication and request new discovery.
192 When switch node is targeted NodeInfo querying will be done in
193 opposite order, this is much stronger check, unfortunately it is
196 p_physp = osm_node_get_physp_ptr(p_node, port_num);
197 if (p_ni_context->dup_count > 5) {
198 report_duplicated_guid(sm, p_physp,
200 p_ni_context->port_num);
201 sm->p_subn->force_heavy_sweep = TRUE;
202 } else if (p_node->sw)
203 requery_dup_node_info(sm, p_physp->p_remote_physp,
204 p_ni_context->dup_count + 1);
206 requery_dup_node_info(sm, p_physp,
207 p_ni_context->dup_count + 1);
211 When there are only two nodes with exact same guids (connected back
212 to back) - the previous check for duplicated guid will not catch
213 them. But the link will be from the port to itself...
214 Enhanced Port 0 is an exception to this
216 if ((osm_node_get_node_guid(p_node) == p_ni_context->node_guid) &&
217 (port_num == p_ni_context->port_num) &&
218 port_num != 0 && cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) {
219 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
220 "Duplicate GUID found by link from a port to itself:"
221 "node 0x%" PRIx64 ", port number %u\n",
222 cl_ntoh64(osm_node_get_node_guid(p_node)), port_num);
223 p_physp = osm_node_get_physp_ptr(p_node, port_num);
224 osm_dump_dr_path(sm->p_log,
225 osm_physp_get_dr_path_ptr(p_physp),
228 if (sm->p_subn->opt.exit_on_fatal == TRUE) {
229 osm_log(sm->p_log, OSM_LOG_SYS,
230 "Errors on subnet. Duplicate GUID found "
231 "by link from a port to itself. "
232 "See verbose opensm.log for more details\n");
237 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
238 "Creating new link between:\n\t\t\t\tnode 0x%" PRIx64
239 ", port number %u and\n\t\t\t\tnode 0x%" PRIx64
240 ", port number %u\n",
241 cl_ntoh64(osm_node_get_node_guid(p_node)), port_num,
242 cl_ntoh64(p_ni_context->node_guid), p_ni_context->port_num);
244 if (sm->ucast_mgr.cache_valid)
245 osm_ucast_cache_check_new_link(&sm->ucast_mgr,
248 p_ni_context->port_num);
250 osm_node_link(p_node, port_num, p_neighbor_node,
251 p_ni_context->port_num);
254 OSM_LOG_EXIT(sm->p_log);
257 /**********************************************************************
258 The plock must be held before calling this function.
259 **********************************************************************/
261 __osm_ni_rcv_process_new_node(IN osm_sm_t * sm,
262 IN osm_node_t * const p_node,
263 IN const osm_madw_t * const p_madw)
265 ib_api_status_t status = IB_SUCCESS;
266 osm_madw_context_t context;
267 osm_physp_t *p_physp;
268 ib_node_info_t *p_ni;
272 OSM_LOG_ENTER(sm->p_log);
274 p_smp = osm_madw_get_smp_ptr(p_madw);
275 p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
276 port_num = ib_node_info_get_local_port_num(p_ni);
279 Request PortInfo & NodeDescription attributes for the port
280 that responded to the NodeInfo attribute.
281 Because this is a channel adapter or router, we are
282 not allowed to request PortInfo for the other ports.
283 Set the context union properly, so the recipient
284 knows which node & port are relevant.
286 p_physp = osm_node_get_physp_ptr(p_node, port_num);
288 context.pi_context.node_guid = p_ni->node_guid;
289 context.pi_context.port_guid = p_ni->port_guid;
290 context.pi_context.set_method = FALSE;
291 context.pi_context.light_sweep = FALSE;
292 context.pi_context.active_transition = FALSE;
294 status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp),
295 IB_MAD_ATTR_PORT_INFO,
296 cl_hton32(port_num), CL_DISP_MSGID_NONE, &context);
297 if (status != IB_SUCCESS)
298 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D02: "
299 "Failure initiating PortInfo request (%s)\n",
300 ib_get_err_str(status));
302 OSM_LOG_EXIT(sm->p_log);
305 /**********************************************************************
306 The plock must be held before calling this function.
307 **********************************************************************/
309 osm_req_get_node_desc(IN osm_sm_t * sm,
310 osm_physp_t *p_physp)
312 ib_api_status_t status = IB_SUCCESS;
313 osm_madw_context_t context;
315 OSM_LOG_ENTER(sm->p_log);
317 context.nd_context.node_guid =
318 osm_node_get_node_guid(osm_physp_get_node_ptr(p_physp));
320 status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp),
321 IB_MAD_ATTR_NODE_DESC,
322 0, CL_DISP_MSGID_NONE, &context);
323 if (status != IB_SUCCESS)
324 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D03: "
325 "Failure initiating NodeDescription request (%s)\n",
326 ib_get_err_str(status));
328 OSM_LOG_EXIT(sm->p_log);
331 /**********************************************************************
332 The plock must be held before calling this function.
333 **********************************************************************/
335 __osm_ni_rcv_get_node_desc(IN osm_sm_t * sm,
336 IN osm_node_t * const p_node,
337 IN const osm_madw_t * const p_madw)
339 ib_node_info_t *p_ni;
342 osm_physp_t *p_physp = NULL;
344 OSM_LOG_ENTER(sm->p_log);
346 p_smp = osm_madw_get_smp_ptr(p_madw);
347 p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
348 port_num = ib_node_info_get_local_port_num(p_ni);
351 Request PortInfo & NodeDescription attributes for the port
352 that responded to the NodeInfo attribute.
353 Because this is a channel adapter or router, we are
354 not allowed to request PortInfo for the other ports.
355 Set the context union properly, so the recipient
356 knows which node & port are relevant.
358 p_physp = osm_node_get_physp_ptr(p_node, port_num);
360 osm_req_get_node_desc(sm, p_physp);
362 OSM_LOG_EXIT(sm->p_log);
365 /**********************************************************************
366 The plock must be held before calling this function.
367 **********************************************************************/
369 __osm_ni_rcv_process_new_ca_or_router(IN osm_sm_t * sm,
370 IN osm_node_t * const p_node,
371 IN const osm_madw_t * const p_madw)
373 OSM_LOG_ENTER(sm->p_log);
375 __osm_ni_rcv_process_new_node(sm, p_node, p_madw);
378 A node guid of 0 is the corner case that indicates
379 we discovered our own node. Initialize the subnet
380 object with the SM's own port guid.
382 if (osm_madw_get_ni_context_ptr(p_madw)->node_guid == 0)
383 sm->p_subn->sm_port_guid = p_node->node_info.port_guid;
385 OSM_LOG_EXIT(sm->p_log);
388 /**********************************************************************
389 The plock must be held before calling this function.
390 **********************************************************************/
392 __osm_ni_rcv_process_existing_ca_or_router(IN osm_sm_t * sm,
393 IN osm_node_t * const p_node,
394 IN const osm_madw_t * const p_madw)
396 ib_node_info_t *p_ni;
399 osm_port_t *p_port_check;
400 osm_madw_context_t context;
402 osm_physp_t *p_physp;
403 ib_api_status_t status;
404 osm_dr_path_t *p_dr_path;
405 osm_bind_handle_t h_bind;
407 OSM_LOG_ENTER(sm->p_log);
409 p_smp = osm_madw_get_smp_ptr(p_madw);
410 p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
411 port_num = ib_node_info_get_local_port_num(p_ni);
412 h_bind = osm_madw_get_bind_handle(p_madw);
415 Determine if we have encountered this node through a
416 previously undiscovered port. If so, build the new
419 p_port = osm_get_port_by_guid(sm->p_subn, p_ni->port_guid);
421 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
422 "Creating new port object with GUID 0x%" PRIx64 "\n",
423 cl_ntoh64(p_ni->port_guid));
425 osm_node_init_physp(p_node, p_madw);
427 p_port = osm_port_new(p_ni, p_node);
428 if (p_port == NULL) {
429 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D04: "
430 "Unable to create new port object\n");
435 Add the new port object to the database.
438 (osm_port_t *) cl_qmap_insert(&sm->p_subn->port_guid_tbl,
441 if (p_port_check != p_port) {
443 We should never be here!
444 Somehow, this port GUID already exists in the table.
446 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D12: "
447 "Port 0x%" PRIx64 " already in the database!\n",
448 cl_ntoh64(p_ni->port_guid));
450 osm_port_delete(&p_port);
454 /* If we are a master, then this means the port is new on the subnet.
455 Mark it as new - need to send trap 64 on these ports.
456 The condition that we are master is true, since if we are in discovering
457 state (meaning we woke up from standby or we are just initializing),
458 then these ports may be new to us, but are not new on the subnet.
459 If we are master, then the subnet as we know it is the updated one,
460 and any new ports we encounter should cause trap 64. C14-72.1.1 */
461 if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER)
464 p_physp = osm_node_get_physp_ptr(p_node, port_num);
466 p_physp = osm_node_get_physp_ptr(p_node, port_num);
468 Update the DR Path to the port,
469 in case the old one is no longer available.
471 p_dr_path = osm_physp_get_dr_path_ptr(p_physp);
473 osm_dr_path_init(p_dr_path, h_bind, p_smp->hop_count,
474 p_smp->initial_path);
477 context.pi_context.node_guid = p_ni->node_guid;
478 context.pi_context.port_guid = p_ni->port_guid;
479 context.pi_context.set_method = FALSE;
480 context.pi_context.light_sweep = FALSE;
482 status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp),
483 IB_MAD_ATTR_PORT_INFO,
484 cl_hton32(port_num), CL_DISP_MSGID_NONE, &context);
486 if (status != IB_SUCCESS)
487 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D13: "
488 "Failure initiating PortInfo request (%s)\n",
489 ib_get_err_str(status));
492 OSM_LOG_EXIT(sm->p_log);
495 /**********************************************************************
496 **********************************************************************/
498 __osm_ni_rcv_process_switch(IN osm_sm_t * sm,
499 IN osm_node_t * const p_node,
500 IN const osm_madw_t * const p_madw)
502 ib_api_status_t status = IB_SUCCESS;
503 osm_madw_context_t context;
507 OSM_LOG_ENTER(sm->p_log);
509 p_smp = osm_madw_get_smp_ptr(p_madw);
511 /* update DR path of already initialized switch port 0 */
512 path = osm_physp_get_dr_path_ptr(osm_node_get_physp_ptr(p_node, 0));
513 osm_dr_path_init(path, osm_madw_get_bind_handle(p_madw),
514 p_smp->hop_count, p_smp->initial_path);
516 context.si_context.node_guid = osm_node_get_node_guid(p_node);
517 context.si_context.set_method = FALSE;
518 context.si_context.light_sweep = FALSE;
520 /* Request a SwitchInfo attribute */
521 status = osm_req_get(sm, path, IB_MAD_ATTR_SWITCH_INFO,
522 0, CL_DISP_MSGID_NONE, &context);
523 if (status != IB_SUCCESS)
524 /* continue despite error */
525 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D06: "
526 "Failure initiating SwitchInfo request (%s)\n",
527 ib_get_err_str(status));
529 OSM_LOG_EXIT(sm->p_log);
532 /**********************************************************************
533 The plock must be held before calling this function.
534 **********************************************************************/
536 __osm_ni_rcv_process_existing_switch(IN osm_sm_t * sm,
537 IN osm_node_t * const p_node,
538 IN const osm_madw_t * const p_madw)
540 OSM_LOG_ENTER(sm->p_log);
543 If this switch has already been probed during this sweep,
544 then don't bother reprobing it.
545 There is one exception - if the node has been visited, but
546 for some reason we don't have the switch object (this can happen
547 if the SwitchInfo mad didn't reach the SM) then we want
548 to retry to probe the switch.
550 if (p_node->discovery_count == 1)
551 __osm_ni_rcv_process_switch(sm, p_node, p_madw);
552 else if (!p_node->sw || p_node->sw->discovery_count == 0) {
553 /* we don't have the SwitchInfo - retry to get it */
554 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
555 "Retry to get SwitchInfo on node GUID:0x%"
556 PRIx64 "\n", cl_ntoh64(osm_node_get_node_guid(p_node)));
557 __osm_ni_rcv_process_switch(sm, p_node, p_madw);
560 OSM_LOG_EXIT(sm->p_log);
563 /**********************************************************************
564 The plock must be held before calling this function.
565 **********************************************************************/
567 __osm_ni_rcv_process_new_switch(IN osm_sm_t * sm,
568 IN osm_node_t * const p_node,
569 IN const osm_madw_t * const p_madw)
571 OSM_LOG_ENTER(sm->p_log);
573 __osm_ni_rcv_process_switch(sm, p_node, p_madw);
576 A node guid of 0 is the corner case that indicates
577 we discovered our own node. Initialize the subnet
578 object with the SM's own port guid.
580 if (osm_madw_get_ni_context_ptr(p_madw)->node_guid == 0)
581 sm->p_subn->sm_port_guid = p_node->node_info.port_guid;
583 OSM_LOG_EXIT(sm->p_log);
586 /**********************************************************************
587 The plock must NOT be held before calling this function.
588 **********************************************************************/
590 __osm_ni_rcv_process_new(IN osm_sm_t * sm,
591 IN const osm_madw_t * const p_madw)
594 osm_node_t *p_node_check;
596 osm_port_t *p_port_check;
597 osm_router_t *p_rtr = NULL;
598 osm_router_t *p_rtr_check;
599 cl_qmap_t *p_rtr_guid_tbl;
600 ib_node_info_t *p_ni;
602 osm_ni_context_t *p_ni_context;
605 OSM_LOG_ENTER(sm->p_log);
607 p_smp = osm_madw_get_smp_ptr(p_madw);
608 p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
609 p_ni_context = osm_madw_get_ni_context_ptr(p_madw);
610 port_num = ib_node_info_get_local_port_num(p_ni);
612 osm_dump_smp_dr_path(sm->p_log, p_smp, OSM_LOG_VERBOSE);
614 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
615 "Discovered new %s node,"
616 "\n\t\t\t\tGUID 0x%" PRIx64 ", TID 0x%" PRIx64 "\n",
617 ib_get_node_type_str(p_ni->node_type),
618 cl_ntoh64(p_ni->node_guid), cl_ntoh64(p_smp->trans_id));
620 p_node = osm_node_new(p_madw);
621 if (p_node == NULL) {
622 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D07: "
623 "Unable to create new node object\n");
628 Create a new port object to represent this node's physical
629 ports in the port table.
631 p_port = osm_port_new(p_ni, p_node);
632 if (p_port == NULL) {
633 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D14: "
634 "Unable to create new port object\n");
635 osm_node_delete(&p_node);
640 Add the new port object to the database.
643 (osm_port_t *) cl_qmap_insert(&sm->p_subn->port_guid_tbl,
644 p_ni->port_guid, &p_port->map_item);
645 if (p_port_check != p_port) {
647 We should never be here!
648 Somehow, this port GUID already exists in the table.
650 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D15: "
651 "Duplicate Port GUID 0x%" PRIx64
652 "! Found by the two directed routes:\n",
653 cl_ntoh64(p_ni->port_guid));
654 osm_dump_dr_path(sm->p_log,
655 osm_physp_get_dr_path_ptr(p_port->p_physp),
657 osm_dump_dr_path(sm->p_log,
658 osm_physp_get_dr_path_ptr(p_port_check->
661 osm_port_delete(&p_port);
662 osm_node_delete(&p_node);
666 /* If we are a master, then this means the port is new on the subnet.
667 Mark it as new - need to send trap 64 on these ports.
668 The condition that we are master is true, since if we are in discovering
669 state (meaning we woke up from standby or we are just initializing),
670 then these ports may be new to us, but are not new on the subnet.
671 If we are master, then the subnet as we know it is the updated one,
672 and any new ports we encounter should cause trap 64. C14-72.1.1 */
673 if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER)
676 /* If there were RouterInfo or other router attribute,
677 this would be elsewhere */
678 if (p_ni->node_type == IB_NODE_TYPE_ROUTER) {
679 if ((p_rtr = osm_router_new(p_port)) == NULL)
680 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1A: "
681 "Unable to create new router object\n");
683 p_rtr_guid_tbl = &sm->p_subn->rtr_guid_tbl;
685 (osm_router_t *) cl_qmap_insert(p_rtr_guid_tbl,
688 if (p_rtr_check != p_rtr)
689 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1B: "
690 "Unable to add port GUID:0x%016" PRIx64
691 " to router table\n",
692 cl_ntoh64(p_ni->port_guid));
697 (osm_node_t *) cl_qmap_insert(&sm->p_subn->node_guid_tbl,
698 p_ni->node_guid, &p_node->map_item);
699 if (p_node_check != p_node) {
701 This node must have been inserted by another thread.
702 This is unexpected, but is not an error.
703 We can simply clean-up, since the other thread will
704 see this processing through to completion.
706 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
707 "Discovery race detected at node 0x%" PRIx64 "\n",
708 cl_ntoh64(p_ni->node_guid));
709 osm_node_delete(&p_node);
710 p_node = p_node_check;
711 __osm_ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
714 __osm_ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
716 p_node->discovery_count++;
717 __osm_ni_rcv_get_node_desc(sm, p_node, p_madw);
719 switch (p_ni->node_type) {
720 case IB_NODE_TYPE_CA:
721 case IB_NODE_TYPE_ROUTER:
722 __osm_ni_rcv_process_new_ca_or_router(sm, p_node, p_madw);
724 case IB_NODE_TYPE_SWITCH:
725 __osm_ni_rcv_process_new_switch(sm, p_node, p_madw);
728 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: "
729 "Unknown node type %u with GUID 0x%" PRIx64 "\n",
730 p_ni->node_type, cl_ntoh64(p_ni->node_guid));
735 OSM_LOG_EXIT(sm->p_log);
738 /**********************************************************************
739 The plock must be held before calling this function.
740 **********************************************************************/
742 __osm_ni_rcv_process_existing(IN osm_sm_t * sm,
743 IN osm_node_t * const p_node,
744 IN const osm_madw_t * const p_madw)
746 ib_node_info_t *p_ni;
748 osm_ni_context_t *p_ni_context;
751 OSM_LOG_ENTER(sm->p_log);
753 p_smp = osm_madw_get_smp_ptr(p_madw);
754 p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
755 p_ni_context = osm_madw_get_ni_context_ptr(p_madw);
756 port_num = ib_node_info_get_local_port_num(p_ni);
758 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
759 "Rediscovered %s node 0x%" PRIx64 " TID 0x%" PRIx64
760 ", discovered %u times already\n",
761 ib_get_node_type_str(p_ni->node_type),
762 cl_ntoh64(p_ni->node_guid),
763 cl_ntoh64(p_smp->trans_id), p_node->discovery_count);
766 If we haven't already encountered this existing node
767 on this particular sweep, then process further.
769 p_node->discovery_count++;
771 switch (p_ni->node_type) {
772 case IB_NODE_TYPE_CA:
773 case IB_NODE_TYPE_ROUTER:
774 __osm_ni_rcv_process_existing_ca_or_router(sm, p_node,
778 case IB_NODE_TYPE_SWITCH:
779 __osm_ni_rcv_process_existing_switch(sm, p_node, p_madw);
783 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D09: "
784 "Unknown node type %u with GUID 0x%" PRIx64 "\n",
785 p_ni->node_type, cl_ntoh64(p_ni->node_guid));
789 __osm_ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
791 OSM_LOG_EXIT(sm->p_log);
794 /**********************************************************************
795 **********************************************************************/
796 void osm_ni_rcv_process(IN void *context, IN void *data)
798 osm_sm_t *sm = context;
799 osm_madw_t *p_madw = data;
800 ib_node_info_t *p_ni;
806 OSM_LOG_ENTER(sm->p_log);
810 p_smp = osm_madw_get_smp_ptr(p_madw);
811 p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
813 CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_NODE_INFO);
815 if (p_ni->node_guid == 0) {
816 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: "
817 "Got Zero Node GUID! Found on the directed route:\n");
818 osm_dump_smp_dr_path(sm->p_log, p_smp, OSM_LOG_ERROR);
822 if (p_ni->port_guid == 0) {
823 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D17: "
824 "Got Zero Port GUID! Found on the directed route:\n");
825 osm_dump_smp_dr_path(sm->p_log, p_smp, OSM_LOG_ERROR);
830 Determine if this node has already been discovered,
831 and process accordingly.
832 During processing of this node, hold the shared lock.
835 CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
836 p_node = osm_get_node_by_guid(sm->p_subn, p_ni->node_guid);
838 osm_dump_node_info(sm->p_log, p_ni, OSM_LOG_DEBUG);
841 __osm_ni_rcv_process_new(sm, p_madw);
843 __osm_ni_rcv_process_existing(sm, p_node, p_madw);
845 CL_PLOCK_RELEASE(sm->p_lock);
848 OSM_LOG_EXIT(sm->p_log);