2 * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5 * Copyright (c) 2009 HNR Consulting. All rights reserved.
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
39 * Implementation of osm_ni_rcv_t.
40 * This object represents the NodeInfo Receiver object.
41 * This object is part of the opensm family of objects.
46 #endif /* HAVE_CONFIG_H */
50 #include <iba/ib_types.h>
51 #include <complib/cl_qmap.h>
52 #include <complib/cl_passivelock.h>
53 #include <complib/cl_debug.h>
54 #include <opensm/osm_file_ids.h>
55 #define FILE_ID OSM_FILE_NODE_INFO_RCV_C
56 #include <opensm/osm_madw.h>
57 #include <opensm/osm_log.h>
58 #include <opensm/osm_node.h>
59 #include <opensm/osm_subnet.h>
60 #include <opensm/osm_router.h>
61 #include <opensm/osm_mad_pool.h>
62 #include <opensm/osm_helper.h>
63 #include <opensm/osm_msgdef.h>
64 #include <opensm/osm_opensm.h>
65 #include <opensm/osm_ucast_mgr.h>
66 #include <opensm/osm_db_pack.h>
68 static void report_duplicated_guid(IN osm_sm_t * sm, osm_physp_t * p_physp,
69 osm_node_t * p_neighbor_node,
70 const uint8_t port_num)
72 osm_physp_t *p_old, *p_new;
75 p_old = p_physp->p_remote_physp;
76 p_new = osm_node_get_physp_ptr(p_neighbor_node, port_num);
78 OSM_LOG(sm->p_log, OSM_LOG_SYS | OSM_LOG_ERROR, "ERR 0D01: "
79 "Found duplicated node GUID.\n"
80 "Node 0x%" PRIx64 " port %u is reachable from remote node "
81 "0x%" PRIx64 " port %u and remote node 0x%" PRIx64 " port %u.\n"
83 cl_ntoh64(p_physp->p_node->node_info.node_guid),
85 p_old ? cl_ntoh64(p_old->p_node->node_info.node_guid) : 0,
86 p_old ? p_old->port_num : 0,
87 p_new ? cl_ntoh64(p_new->p_node->node_info.node_guid) : 0,
88 p_new ? p_new->port_num : 0);
90 osm_dump_dr_path_v2(sm->p_log, osm_physp_get_dr_path_ptr(p_physp),
91 FILE_ID, OSM_LOG_ERROR);
93 path = *osm_physp_get_dr_path_ptr(p_new);
94 if (osm_dr_path_extend(&path, port_num))
95 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D05: "
96 "DR path with hop count %d couldn't be extended\n",
98 osm_dump_dr_path_v2(sm->p_log, &path, FILE_ID, OSM_LOG_ERROR);
101 static void requery_dup_node_info(IN osm_sm_t * sm, osm_physp_t * p_physp,
104 osm_madw_context_t context;
108 if (!p_physp->p_remote_physp) {
109 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0D: "
110 "DR path couldn't be extended due to NULL remote physp\n");
114 path = *osm_physp_get_dr_path_ptr(p_physp->p_remote_physp);
115 if (osm_dr_path_extend(&path, p_physp->p_remote_physp->port_num)) {
116 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D08: "
117 "DR path with hop count %d couldn't be extended\n",
122 context.ni_context.node_guid =
123 p_physp->p_remote_physp->p_node->node_info.port_guid;
124 context.ni_context.port_num = p_physp->p_remote_physp->port_num;
125 context.ni_context.dup_node_guid = p_physp->p_node->node_info.node_guid;
126 context.ni_context.dup_port_num = p_physp->port_num;
127 context.ni_context.dup_count = count;
129 status = osm_req_get(sm, &path, IB_MAD_ATTR_NODE_INFO, 0,
130 TRUE, 0, CL_DISP_MSGID_NONE, &context);
132 if (status != IB_SUCCESS)
133 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D02: "
134 "Failure initiating NodeInfo request (%s)\n",
135 ib_get_err_str(status));
138 /**********************************************************************
139 The plock must be held before calling this function.
140 **********************************************************************/
141 static void ni_rcv_set_links(IN osm_sm_t * sm, osm_node_t * p_node,
142 const uint8_t port_num,
143 const osm_ni_context_t * p_ni_context)
145 osm_node_t *p_neighbor_node;
146 osm_physp_t *p_physp, *p_remote_physp;
148 OSM_LOG_ENTER(sm->p_log);
151 A special case exists in which the node we're trying to
152 link is our own node. In this case, the guid value in
153 the ni_context will be zero.
155 if (p_ni_context->node_guid == 0) {
156 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
157 "Nothing to link for our own node 0x%" PRIx64 "\n",
158 cl_ntoh64(osm_node_get_node_guid(p_node)));
162 p_neighbor_node = osm_get_node_by_guid(sm->p_subn,
163 p_ni_context->node_guid);
164 if (PF(!p_neighbor_node)) {
165 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D10: "
166 "Unexpected removal of neighbor node 0x%" PRIx64 "\n",
167 cl_ntoh64(p_ni_context->node_guid));
171 /* When setting the link, ports on both
172 sides of the link should be initialized */
173 CL_ASSERT(osm_node_link_has_valid_ports(p_node, port_num,
175 p_ni_context->port_num));
177 if (osm_node_link_exists(p_node, port_num,
178 p_neighbor_node, p_ni_context->port_num)) {
179 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Link already exists\n");
183 p_physp = osm_node_get_physp_ptr(p_node, port_num);
185 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD0E: "
186 "Failed to find physp for port %d of Node GUID 0x%"
187 PRIx64 "\n", port_num,
188 cl_ntoh64(osm_node_get_node_guid(p_node)));
193 * If the link went UP, after we already discovered it, we shouldn't
194 * set the link between the ports and resweep.
196 if (osm_physp_get_port_state(p_physp) == IB_LINK_DOWN &&
197 p_node->physp_discovered[port_num]) {
198 /* Link down on another side. Don't create a link*/
199 p_node->physp_discovered[port_num] = 0;
200 sm->p_subn->force_heavy_sweep = TRUE;
204 if (osm_node_has_any_link(p_node, port_num) &&
205 sm->p_subn->force_heavy_sweep == FALSE &&
206 (!p_ni_context->dup_count ||
207 (p_ni_context->dup_node_guid == osm_node_get_node_guid(p_node) &&
208 p_ni_context->dup_port_num == port_num))) {
211 This could be reconnected ports, but also duplicated GUID
212 (2 nodes have the same guid) or a 12x link with lane reversal
213 that is not configured correctly.
214 We will try to recover by querying NodeInfo again.
215 In order to catch even fast port moving to new location(s)
216 and back we will count up to 5.
217 Some crazy reconnections (newly created switch loop right
218 before targeted CA) will not be catched this way. So in worst
219 case - report GUID duplication and request new discovery.
220 When switch node is targeted NodeInfo querying will be done
221 in opposite order, this is much stronger check, unfortunately
222 it is impossible with CAs.
224 p_physp = osm_node_get_physp_ptr(p_node, port_num);
226 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD0F: "
227 "Failed to find physp for port %d of Node GUID 0x%"
228 PRIx64 "\n", port_num,
229 cl_ntoh64(osm_node_get_node_guid(p_node)));
233 if (p_ni_context->dup_count > 5) {
234 report_duplicated_guid(sm, p_physp, p_neighbor_node,
235 p_ni_context->port_num);
236 sm->p_subn->force_heavy_sweep = TRUE;
237 } else if (p_node->sw)
238 requery_dup_node_info(sm, p_physp->p_remote_physp,
239 p_ni_context->dup_count + 1);
241 requery_dup_node_info(sm, p_physp,
242 p_ni_context->dup_count + 1);
246 When there are only two nodes with exact same guids (connected back
247 to back) - the previous check for duplicated guid will not catch
248 them. But the link will be from the port to itself...
249 Enhanced Port 0 is an exception to this
251 if (osm_node_get_node_guid(p_node) == p_ni_context->node_guid &&
252 port_num == p_ni_context->port_num &&
253 port_num != 0 && cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) {
254 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
255 "Duplicate GUID found by link from a port to itself:"
256 "node 0x%" PRIx64 ", port number %u\n",
257 cl_ntoh64(osm_node_get_node_guid(p_node)), port_num);
258 p_physp = osm_node_get_physp_ptr(p_node, port_num);
260 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD1D: "
261 "Failed to find physp for port %d of Node GUID 0x%"
262 PRIx64 "\n", port_num,
263 cl_ntoh64(osm_node_get_node_guid(p_node)));
267 osm_dump_dr_path_v2(sm->p_log, osm_physp_get_dr_path_ptr(p_physp),
268 FILE_ID, OSM_LOG_VERBOSE);
270 if (sm->p_subn->opt.exit_on_fatal == TRUE) {
271 osm_log_v2(sm->p_log, OSM_LOG_SYS, FILE_ID,
272 "Errors on subnet. Duplicate GUID found "
273 "by link from a port to itself. "
274 "See verbose opensm.log for more details\n");
279 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
280 "Creating new link between:\n\t\t\t\tnode 0x%" PRIx64
281 ", port number %u and\n\t\t\t\tnode 0x%" PRIx64
282 ", port number %u\n",
283 cl_ntoh64(osm_node_get_node_guid(p_node)), port_num,
284 cl_ntoh64(p_ni_context->node_guid), p_ni_context->port_num);
286 if (sm->ucast_mgr.cache_valid)
287 osm_ucast_cache_check_new_link(&sm->ucast_mgr, p_node, port_num,
289 p_ni_context->port_num);
291 p_physp = osm_node_get_physp_ptr(p_node, port_num);
292 p_remote_physp = osm_node_get_physp_ptr(p_neighbor_node,
293 p_ni_context->port_num);
294 if (!p_physp || !p_remote_physp)
297 osm_node_link(p_node, port_num, p_neighbor_node, p_ni_context->port_num);
299 osm_db_neighbor_set(sm->p_subn->p_neighbor,
300 cl_ntoh64(osm_physp_get_port_guid(p_physp)),
302 cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)),
303 p_ni_context->port_num);
304 osm_db_neighbor_set(sm->p_subn->p_neighbor,
305 cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)),
306 p_ni_context->port_num,
307 cl_ntoh64(osm_physp_get_port_guid(p_physp)),
311 OSM_LOG_EXIT(sm->p_log);
314 static void ni_rcv_get_port_info(IN osm_sm_t * sm, IN osm_node_t * node,
315 IN const osm_madw_t * madw)
317 osm_madw_context_t context;
321 ib_api_status_t status;
322 int mlnx_epi_supported = 0;
324 ni = ib_smp_get_payload_ptr(osm_madw_get_smp_ptr(madw));
326 port = ib_node_info_get_local_port_num(ni);
328 if (sm->p_subn->opt.fdr10)
329 mlnx_epi_supported = is_mlnx_ext_port_info_supported(
330 ib_node_info_get_vendor_id(ni),
333 physp = osm_node_get_physp_ptr(node, port);
335 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD1E: "
336 "Failed to find physp for port %d of Node GUID 0x%"
338 cl_ntoh64(osm_node_get_node_guid(node)));
342 context.pi_context.node_guid = osm_node_get_node_guid(node);
343 context.pi_context.port_guid = osm_physp_get_port_guid(physp);
344 context.pi_context.set_method = FALSE;
345 context.pi_context.light_sweep = FALSE;
346 context.pi_context.active_transition = FALSE;
347 context.pi_context.client_rereg = FALSE;
349 status = osm_req_get(sm, osm_physp_get_dr_path_ptr(physp),
350 IB_MAD_ATTR_PORT_INFO, cl_hton32(port),
351 TRUE, 0, CL_DISP_MSGID_NONE, &context);
352 if (status != IB_SUCCESS)
353 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD02: "
354 "Failure initiating PortInfo request (%s)\n",
355 ib_get_err_str(status));
356 if (mlnx_epi_supported) {
357 status = osm_req_get(sm,
358 osm_physp_get_dr_path_ptr(physp),
359 IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO,
361 TRUE, 0, CL_DISP_MSGID_NONE, &context);
362 if (status != IB_SUCCESS)
363 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0B: "
364 "Failure initiating MLNX ExtPortInfo request (%s)\n",
365 ib_get_err_str(status));
369 /**********************************************************************
370 The plock must be held before calling this function.
371 **********************************************************************/
372 void osm_req_get_node_desc(IN osm_sm_t * sm, osm_physp_t * p_physp)
374 ib_api_status_t status = IB_SUCCESS;
375 osm_madw_context_t context;
377 OSM_LOG_ENTER(sm->p_log);
379 context.nd_context.node_guid =
380 osm_node_get_node_guid(osm_physp_get_node_ptr(p_physp));
382 status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp),
383 IB_MAD_ATTR_NODE_DESC, 0, TRUE, 0,
384 CL_DISP_MSGID_NONE, &context);
385 if (status != IB_SUCCESS)
386 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D03: "
387 "Failure initiating NodeDescription request (%s)\n",
388 ib_get_err_str(status));
390 OSM_LOG_EXIT(sm->p_log);
393 /**********************************************************************
394 The plock must be held before calling this function.
395 **********************************************************************/
396 static void ni_rcv_get_node_desc(IN osm_sm_t * sm, IN osm_node_t * p_node,
397 IN const osm_madw_t * p_madw)
399 ib_node_info_t *p_ni;
402 osm_physp_t *p_physp = NULL;
404 OSM_LOG_ENTER(sm->p_log);
406 p_smp = osm_madw_get_smp_ptr(p_madw);
407 p_ni = ib_smp_get_payload_ptr(p_smp);
408 port_num = ib_node_info_get_local_port_num(p_ni);
411 Request PortInfo & NodeDescription attributes for the port
412 that responded to the NodeInfo attribute.
413 Because this is a channel adapter or router, we are
414 not allowed to request PortInfo for the other ports.
415 Set the context union properly, so the recipient
416 knows which node & port are relevant.
418 p_physp = osm_node_get_physp_ptr(p_node, port_num);
420 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD1F: "
421 "Failed to find physp for port %d of Node GUID 0x%"
422 PRIx64 "\n", port_num,
423 cl_ntoh64(osm_node_get_node_guid(p_node)));
427 osm_req_get_node_desc(sm, p_physp);
429 OSM_LOG_EXIT(sm->p_log);
432 /**********************************************************************
433 The plock must be held before calling this function.
434 **********************************************************************/
435 static void ni_rcv_process_new_ca_or_router(IN osm_sm_t * sm,
436 IN osm_node_t * p_node,
437 IN const osm_madw_t * p_madw)
439 OSM_LOG_ENTER(sm->p_log);
441 ni_rcv_get_port_info(sm, p_node, p_madw);
444 A node guid of 0 is the corner case that indicates
445 we discovered our own node. Initialize the subnet
446 object with the SM's own port guid.
448 if (osm_madw_get_ni_context_ptr(p_madw)->node_guid == 0)
449 sm->p_subn->sm_port_guid = p_node->node_info.port_guid;
451 OSM_LOG_EXIT(sm->p_log);
454 /**********************************************************************
455 The plock must be held before calling this function.
456 **********************************************************************/
457 static void ni_rcv_process_existing_ca_or_router(IN osm_sm_t * sm,
458 IN osm_node_t * p_node,
459 IN const osm_madw_t * p_madw)
461 ib_node_info_t *p_ni;
464 osm_port_t *p_port_check;
466 osm_dr_path_t *p_dr_path;
467 osm_alias_guid_t *p_alias_guid, *p_alias_guid_check;
469 OSM_LOG_ENTER(sm->p_log);
471 p_smp = osm_madw_get_smp_ptr(p_madw);
472 p_ni = ib_smp_get_payload_ptr(p_smp);
473 port_num = ib_node_info_get_local_port_num(p_ni);
476 Determine if we have encountered this node through a
477 previously undiscovered port. If so, build the new
480 p_port = osm_get_port_by_guid(sm->p_subn, p_ni->port_guid);
482 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
483 "Creating new port object with GUID 0x%" PRIx64 "\n",
484 cl_ntoh64(p_ni->port_guid));
486 osm_node_init_physp(p_node, port_num, p_madw);
488 p_port = osm_port_new(p_ni, p_node);
489 if (PF(p_port == NULL)) {
490 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D04: "
491 "Unable to create new port object\n");
496 Add the new port object to the database.
499 (osm_port_t *) cl_qmap_insert(&sm->p_subn->port_guid_tbl,
502 if (PF(p_port_check != p_port)) {
504 We should never be here!
505 Somehow, this port GUID already exists in the table.
507 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D12: "
508 "Port 0x%" PRIx64 " already in the database!\n",
509 cl_ntoh64(p_ni->port_guid));
511 osm_port_delete(&p_port);
515 p_alias_guid = osm_alias_guid_new(p_ni->port_guid,
517 if (PF(!p_alias_guid)) {
518 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D11: "
519 "alias guid memory allocation failed"
520 " for port GUID 0x%" PRIx64 "\n",
521 cl_ntoh64(p_ni->port_guid));
525 /* insert into alias guid table */
527 (osm_alias_guid_t *) cl_qmap_insert(&sm->p_subn->alias_port_guid_tbl,
528 p_alias_guid->alias_guid,
529 &p_alias_guid->map_item);
530 if (p_alias_guid_check != p_alias_guid) {
531 /* alias GUID is a duplicate */
532 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D13: "
533 "Duplicate alias port GUID 0x%" PRIx64 "\n",
534 cl_ntoh64(p_ni->port_guid));
535 osm_alias_guid_delete(&p_alias_guid);
536 osm_port_delete(&p_port);
541 /* If we are a master, then this means the port is new on the subnet.
542 Mark it as new - need to send trap 64 for these ports.
543 The condition that we are master is true, since if we are in discovering
544 state (meaning we woke up from standby or we are just initializing),
545 then these ports may be new to us, but are not new on the subnet.
546 If we are master, then the subnet as we know it is the updated one,
547 and any new ports we encounter should cause trap 64. C14-72.1.1 */
548 if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER)
552 osm_physp_t *p_physp = osm_node_get_physp_ptr(p_node, port_num);
554 if (PF(p_physp == NULL)) {
555 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1C: "
556 "No physical port found for node GUID 0x%"
557 PRIx64 " port %u. Might be duplicate port GUID\n",
558 cl_ntoh64(p_node->node_info.node_guid),
564 Update the DR Path to the port,
565 in case the old one is no longer available.
567 p_dr_path = osm_physp_get_dr_path_ptr(p_physp);
569 osm_dr_path_init(p_dr_path, p_smp->hop_count,
570 p_smp->initial_path);
573 ni_rcv_get_port_info(sm, p_node, p_madw);
576 OSM_LOG_EXIT(sm->p_log);
579 static void ni_rcv_process_switch(IN osm_sm_t * sm, IN osm_node_t * p_node,
580 IN const osm_madw_t * p_madw)
582 ib_api_status_t status = IB_SUCCESS;
583 osm_physp_t *p_physp;
584 osm_madw_context_t context;
588 OSM_LOG_ENTER(sm->p_log);
590 p_smp = osm_madw_get_smp_ptr(p_madw);
592 p_physp = osm_node_get_physp_ptr(p_node, 0);
593 /* update DR path of already initialized switch port 0 */
594 path = osm_physp_get_dr_path_ptr(p_physp);
595 osm_dr_path_init(path, p_smp->hop_count, p_smp->initial_path);
597 context.si_context.node_guid = osm_node_get_node_guid(p_node);
598 context.si_context.set_method = FALSE;
599 context.si_context.light_sweep = FALSE;
600 context.si_context.lft_top_change = FALSE;
602 /* Request a SwitchInfo attribute */
603 status = osm_req_get(sm, path, IB_MAD_ATTR_SWITCH_INFO, 0, TRUE, 0,
604 CL_DISP_MSGID_NONE, &context);
605 if (status != IB_SUCCESS)
606 /* continue despite error */
607 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D06: "
608 "Failure initiating SwitchInfo request (%s)\n",
609 ib_get_err_str(status));
611 OSM_LOG_EXIT(sm->p_log);
614 /**********************************************************************
615 The plock must be held before calling this function.
616 **********************************************************************/
617 static void ni_rcv_process_existing_switch(IN osm_sm_t * sm,
618 IN osm_node_t * p_node,
619 IN const osm_madw_t * p_madw)
621 OSM_LOG_ENTER(sm->p_log);
624 If this switch has already been probed during this sweep,
625 then don't bother reprobing it.
627 if (p_node->discovery_count == 1)
628 ni_rcv_process_switch(sm, p_node, p_madw);
630 OSM_LOG_EXIT(sm->p_log);
633 /**********************************************************************
634 The plock must be held before calling this function.
635 **********************************************************************/
636 static void ni_rcv_process_new_switch(IN osm_sm_t * sm, IN osm_node_t * p_node,
637 IN const osm_madw_t * p_madw)
639 OSM_LOG_ENTER(sm->p_log);
641 ni_rcv_process_switch(sm, p_node, p_madw);
644 A node guid of 0 is the corner case that indicates
645 we discovered our own node. Initialize the subnet
646 object with the SM's own port guid.
648 if (osm_madw_get_ni_context_ptr(p_madw)->node_guid == 0)
649 sm->p_subn->sm_port_guid = p_node->node_info.port_guid;
651 OSM_LOG_EXIT(sm->p_log);
654 /**********************************************************************
655 The plock must NOT be held before calling this function.
656 **********************************************************************/
657 static void ni_rcv_process_new(IN osm_sm_t * sm, IN const osm_madw_t * p_madw)
660 osm_node_t *p_node_check;
662 osm_port_t *p_port_check;
663 osm_router_t *p_rtr = NULL;
664 osm_router_t *p_rtr_check;
665 cl_qmap_t *p_rtr_guid_tbl;
666 ib_node_info_t *p_ni;
668 osm_ni_context_t *p_ni_context;
669 osm_alias_guid_t *p_alias_guid, *p_alias_guid_check;
672 OSM_LOG_ENTER(sm->p_log);
674 p_smp = osm_madw_get_smp_ptr(p_madw);
675 p_ni = ib_smp_get_payload_ptr(p_smp);
676 p_ni_context = osm_madw_get_ni_context_ptr(p_madw);
677 port_num = ib_node_info_get_local_port_num(p_ni);
679 osm_dump_smp_dr_path_v2(sm->p_log, p_smp, FILE_ID, OSM_LOG_VERBOSE);
681 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
682 "Discovered new %s node,"
683 "\n\t\t\t\tGUID 0x%" PRIx64 ", TID 0x%" PRIx64 "\n",
684 ib_get_node_type_str(p_ni->node_type),
685 cl_ntoh64(p_ni->node_guid), cl_ntoh64(p_smp->trans_id));
687 if (PF(port_num > p_ni->num_ports)) {
688 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0A: "
689 "New %s node GUID 0x%" PRIx64 "is non-compliant and "
690 "is being ignored since the "
691 "local port num %u > num ports %u\n",
692 ib_get_node_type_str(p_ni->node_type),
693 cl_ntoh64(p_ni->node_guid), port_num,
698 p_node = osm_node_new(p_madw);
699 if (PF(p_node == NULL)) {
700 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D07: "
701 "Unable to create new node object\n");
706 Create a new port object to represent this node's physical
707 ports in the port table.
709 p_port = osm_port_new(p_ni, p_node);
710 if (PF(p_port == NULL)) {
711 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D14: "
712 "Unable to create new port object\n");
713 osm_node_delete(&p_node);
718 Add the new port object to the database.
721 (osm_port_t *) cl_qmap_insert(&sm->p_subn->port_guid_tbl,
722 p_ni->port_guid, &p_port->map_item);
723 if (PF(p_port_check != p_port)) {
725 We should never be here!
726 Somehow, this port GUID already exists in the table.
728 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D15: "
729 "Duplicate Port GUID 0x%" PRIx64
730 "! Found by the two directed routes:\n",
731 cl_ntoh64(p_ni->port_guid));
732 osm_dump_dr_path_v2(sm->p_log,
733 osm_physp_get_dr_path_ptr(p_port->p_physp),
734 FILE_ID, OSM_LOG_ERROR);
735 osm_dump_dr_path_v2(sm->p_log,
736 osm_physp_get_dr_path_ptr(p_port_check->
738 FILE_ID, OSM_LOG_ERROR);
739 osm_port_delete(&p_port);
740 osm_node_delete(&p_node);
744 p_alias_guid = osm_alias_guid_new(p_ni->port_guid,
746 if (PF(!p_alias_guid)) {
747 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D18: "
748 "alias guid memory allocation failed"
749 " for port GUID 0x%" PRIx64 "\n",
750 cl_ntoh64(p_ni->port_guid));
754 /* insert into alias guid table */
756 (osm_alias_guid_t *) cl_qmap_insert(&sm->p_subn->alias_port_guid_tbl,
757 p_alias_guid->alias_guid,
758 &p_alias_guid->map_item);
759 if (p_alias_guid_check != p_alias_guid) {
760 /* alias GUID is a duplicate */
761 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D19: "
762 "Duplicate alias port GUID 0x%" PRIx64 "\n",
763 cl_ntoh64(p_ni->port_guid));
764 osm_alias_guid_delete(&p_alias_guid);
768 /* If we are a master, then this means the port is new on the subnet.
769 Mark it as new - need to send trap 64 on these ports.
770 The condition that we are master is true, since if we are in discovering
771 state (meaning we woke up from standby or we are just initializing),
772 then these ports may be new to us, but are not new on the subnet.
773 If we are master, then the subnet as we know it is the updated one,
774 and any new ports we encounter should cause trap 64. C14-72.1.1 */
775 if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER)
778 /* If there were RouterInfo or other router attribute,
779 this would be elsewhere */
780 if (p_ni->node_type == IB_NODE_TYPE_ROUTER) {
781 if (PF((p_rtr = osm_router_new(p_port)) == NULL))
782 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1A: "
783 "Unable to create new router object\n");
785 p_rtr_guid_tbl = &sm->p_subn->rtr_guid_tbl;
787 (osm_router_t *) cl_qmap_insert(p_rtr_guid_tbl,
790 if (PF(p_rtr_check != p_rtr))
791 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1B: "
792 "Unable to add port GUID:0x%016" PRIx64
793 " to router table\n",
794 cl_ntoh64(p_ni->port_guid));
799 (osm_node_t *) cl_qmap_insert(&sm->p_subn->node_guid_tbl,
800 p_ni->node_guid, &p_node->map_item);
801 if (PF(p_node_check != p_node)) {
803 This node must have been inserted by another thread.
804 This is unexpected, but is not an error.
805 We can simply clean-up, since the other thread will
806 see this processing through to completion.
808 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
809 "Discovery race detected at node 0x%" PRIx64 "\n",
810 cl_ntoh64(p_ni->node_guid));
811 osm_node_delete(&p_node);
812 p_node = p_node_check;
813 ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
816 ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
818 p_node->discovery_count++;
819 ni_rcv_get_node_desc(sm, p_node, p_madw);
821 switch (p_ni->node_type) {
822 case IB_NODE_TYPE_CA:
823 case IB_NODE_TYPE_ROUTER:
824 ni_rcv_process_new_ca_or_router(sm, p_node, p_madw);
826 case IB_NODE_TYPE_SWITCH:
827 ni_rcv_process_new_switch(sm, p_node, p_madw);
830 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: "
831 "Unknown node type %u with GUID 0x%" PRIx64 "\n",
832 p_ni->node_type, cl_ntoh64(p_ni->node_guid));
837 OSM_LOG_EXIT(sm->p_log);
840 /**********************************************************************
841 The plock must be held before calling this function.
842 **********************************************************************/
843 static void ni_rcv_process_existing(IN osm_sm_t * sm, IN osm_node_t * p_node,
844 IN const osm_madw_t * p_madw)
846 ib_node_info_t *p_ni;
848 osm_ni_context_t *p_ni_context;
851 OSM_LOG_ENTER(sm->p_log);
853 p_smp = osm_madw_get_smp_ptr(p_madw);
854 p_ni = ib_smp_get_payload_ptr(p_smp);
855 p_ni_context = osm_madw_get_ni_context_ptr(p_madw);
856 port_num = ib_node_info_get_local_port_num(p_ni);
858 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
859 "Rediscovered %s node 0x%" PRIx64 " TID 0x%" PRIx64
860 ", discovered %u times already\n",
861 ib_get_node_type_str(p_ni->node_type),
862 cl_ntoh64(p_ni->node_guid),
863 cl_ntoh64(p_smp->trans_id), p_node->discovery_count);
865 if (PF(port_num > p_ni->num_ports)) {
866 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0C: "
867 "Existing %s node GUID 0x%" PRIx64 "is non-compliant "
868 "and is being ignored since the "
869 "local port num %u > num ports %u\n",
870 ib_get_node_type_str(p_ni->node_type),
871 cl_ntoh64(p_ni->node_guid), port_num,
877 If we haven't already encountered this existing node
878 on this particular sweep, then process further.
880 p_node->discovery_count++;
882 switch (p_ni->node_type) {
883 case IB_NODE_TYPE_CA:
884 case IB_NODE_TYPE_ROUTER:
885 ni_rcv_process_existing_ca_or_router(sm, p_node, p_madw);
888 case IB_NODE_TYPE_SWITCH:
889 ni_rcv_process_existing_switch(sm, p_node, p_madw);
893 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D09: "
894 "Unknown node type %u with GUID 0x%" PRIx64 "\n",
895 p_ni->node_type, cl_ntoh64(p_ni->node_guid));
899 if ( p_ni->sys_guid != p_node->node_info.sys_guid) {
900 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Updated SysImageGUID: 0x%"
901 PRIx64 " for node 0x%" PRIx64 "\n",
902 cl_ntoh64(p_ni->sys_guid),
903 cl_ntoh64(p_ni->node_guid));
905 ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
906 p_node->node_info = *p_ni;
909 OSM_LOG_EXIT(sm->p_log);
912 void osm_ni_rcv_process(IN void *context, IN void *data)
914 osm_sm_t *sm = context;
915 osm_madw_t *p_madw = data;
916 ib_node_info_t *p_ni;
922 OSM_LOG_ENTER(sm->p_log);
926 p_smp = osm_madw_get_smp_ptr(p_madw);
927 p_ni = ib_smp_get_payload_ptr(p_smp);
929 CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_NODE_INFO);
931 if (PF(p_ni->node_guid == 0)) {
932 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: "
933 "Got Zero Node GUID! Found on the directed route:\n");
934 osm_dump_smp_dr_path_v2(sm->p_log, p_smp, FILE_ID, OSM_LOG_ERROR);
938 if (PF(p_ni->port_guid == 0)) {
939 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D17: "
940 "Got Zero Port GUID! Found on the directed route:\n");
941 osm_dump_smp_dr_path_v2(sm->p_log, p_smp, FILE_ID, OSM_LOG_ERROR);
945 if (ib_smp_get_status(p_smp)) {
946 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
947 "MAD status 0x%x received\n",
948 cl_ntoh16(ib_smp_get_status(p_smp)));
953 Determine if this node has already been discovered,
954 and process accordingly.
955 During processing of this node, hold the shared lock.
958 CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
959 p_node = osm_get_node_by_guid(sm->p_subn, p_ni->node_guid);
961 osm_dump_node_info_v2(sm->p_log, p_ni, FILE_ID, OSM_LOG_DEBUG);
964 ni_rcv_process_new(sm, p_madw);
966 ni_rcv_process_existing(sm, p_node, p_madw);
968 CL_PLOCK_RELEASE(sm->p_lock);
971 OSM_LOG_EXIT(sm->p_log);