2 * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38 * Implementation of osm_state_mgr_t.
39 * This file implements the State Manager object.
44 #endif /* HAVE_CONFIG_H */
49 #include <iba/ib_types.h>
50 #include <complib/cl_passivelock.h>
51 #include <complib/cl_debug.h>
52 #include <complib/cl_qmap.h>
53 #include <opensm/osm_sm.h>
54 #include <opensm/osm_madw.h>
55 #include <opensm/osm_switch.h>
56 #include <opensm/osm_log.h>
57 #include <opensm/osm_subnet.h>
58 #include <opensm/osm_helper.h>
59 #include <opensm/osm_msgdef.h>
60 #include <opensm/osm_node.h>
61 #include <opensm/osm_port.h>
62 #include <vendor/osm_vendor_api.h>
63 #include <opensm/osm_inform.h>
64 #include <opensm/osm_opensm.h>
66 extern void osm_drop_mgr_process(IN osm_sm_t * sm);
67 extern osm_signal_t osm_qos_setup(IN osm_opensm_t * p_osm);
68 extern osm_signal_t osm_pkey_mgr_process(IN osm_opensm_t * p_osm);
69 extern osm_signal_t osm_mcast_mgr_process(IN osm_sm_t * sm);
70 extern osm_signal_t osm_mcast_mgr_process_mgroups(IN osm_sm_t * sm);
71 extern osm_signal_t osm_link_mgr_process(IN osm_sm_t * sm, IN uint8_t state);
73 /**********************************************************************
74 **********************************************************************/
75 static void __osm_state_mgr_up_msg(IN const osm_sm_t * sm)
78 * This message should be written only once - when the
79 * SM moves to Master state and the subnet is up for
82 osm_log(sm->p_log, sm->p_subn->first_time_master_sweep ?
83 OSM_LOG_SYS : OSM_LOG_INFO, "SUBNET UP\n");
85 OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
86 sm->p_subn->opt.sweep_interval ?
87 "SUBNET UP" : "SUBNET UP (sweep disabled)");
90 /**********************************************************************
91 **********************************************************************/
92 static void __osm_state_mgr_reset_node_count(IN cl_map_item_t *
93 const p_map_item, IN void *context)
95 osm_node_t *p_node = (osm_node_t *) p_map_item;
97 p_node->discovery_count = 0;
100 /**********************************************************************
101 **********************************************************************/
102 static void __osm_state_mgr_reset_port_count(IN cl_map_item_t *
103 const p_map_item, IN void *context)
105 osm_port_t *p_port = (osm_port_t *) p_map_item;
107 p_port->discovery_count = 0;
110 /**********************************************************************
111 **********************************************************************/
113 __osm_state_mgr_reset_switch_count(IN cl_map_item_t * const p_map_item,
116 osm_switch_t *p_sw = (osm_switch_t *) p_map_item;
118 p_sw->discovery_count = 0;
119 p_sw->need_update = 1;
122 /**********************************************************************
123 **********************************************************************/
124 static void __osm_state_mgr_get_sw_info(IN cl_map_item_t * const p_object,
128 osm_dr_path_t *p_dr_path;
129 osm_madw_context_t mad_context;
130 osm_switch_t *const p_sw = (osm_switch_t *) p_object;
131 osm_sm_t *sm = context;
132 ib_api_status_t status;
134 OSM_LOG_ENTER(sm->p_log);
136 p_node = p_sw->p_node;
137 p_dr_path = osm_physp_get_dr_path_ptr(osm_node_get_physp_ptr(p_node, 0));
139 memset(&mad_context, 0, sizeof(mad_context));
141 mad_context.si_context.node_guid = osm_node_get_node_guid(p_node);
142 mad_context.si_context.set_method = FALSE;
143 mad_context.si_context.light_sweep = TRUE;
145 status = osm_req_get(sm, p_dr_path, IB_MAD_ATTR_SWITCH_INFO, 0,
146 OSM_MSG_LIGHT_SWEEP_FAIL, &mad_context);
148 if (status != IB_SUCCESS)
149 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3304: "
150 "Request for SwitchInfo failed\n");
152 OSM_LOG_EXIT(sm->p_log);
155 /**********************************************************************
156 Initiate a remote port info request for the given physical port
157 **********************************************************************/
159 __osm_state_mgr_get_remote_port_info(IN osm_sm_t * sm,
160 IN osm_physp_t * const p_physp)
162 osm_dr_path_t *p_dr_path;
163 osm_dr_path_t rem_node_dr_path;
164 osm_madw_context_t mad_context;
165 ib_api_status_t status;
167 OSM_LOG_ENTER(sm->p_log);
169 /* generate a dr path leaving on the physp to the remote node */
170 p_dr_path = osm_physp_get_dr_path_ptr(p_physp);
171 memcpy(&rem_node_dr_path, p_dr_path, sizeof(osm_dr_path_t));
172 osm_dr_path_extend(&rem_node_dr_path, osm_physp_get_port_num(p_physp));
174 memset(&mad_context, 0, sizeof(mad_context));
176 mad_context.pi_context.node_guid =
177 osm_node_get_node_guid(osm_physp_get_node_ptr(p_physp));
178 mad_context.pi_context.port_guid = p_physp->port_guid;
179 mad_context.pi_context.set_method = FALSE;
180 mad_context.pi_context.light_sweep = TRUE;
181 mad_context.pi_context.active_transition = FALSE;
183 /* note that with some negative logic - if the query failed it means that
184 * there is no point in going to heavy sweep */
185 status = osm_req_get(sm, &rem_node_dr_path,
186 IB_MAD_ATTR_PORT_INFO, 0, CL_DISP_MSGID_NONE,
189 if (status != IB_SUCCESS)
190 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 332E: "
191 "Request for PortInfo failed\n");
193 OSM_LOG_EXIT(sm->p_log);
196 /**********************************************************************
197 Initiates a thorough sweep of the subnet.
198 Used when there is suspicion that something on the subnet has changed.
199 **********************************************************************/
200 static ib_api_status_t __osm_state_mgr_sweep_hop_0(IN osm_sm_t * sm)
202 ib_api_status_t status;
203 osm_dr_path_t dr_path;
204 osm_bind_handle_t h_bind;
205 uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX];
207 OSM_LOG_ENTER(sm->p_log);
209 memset(path_array, 0, sizeof(path_array));
212 * First, get the bind handle.
214 h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl);
215 if (h_bind != OSM_BIND_INVALID_HANDLE) {
216 OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
217 "INITIATING HEAVY SWEEP");
219 * Start the sweep by clearing the port counts, then
220 * get our own NodeInfo at 0 hops.
222 CL_PLOCK_ACQUIRE(sm->p_lock);
224 cl_qmap_apply_func(&sm->p_subn->node_guid_tbl,
225 __osm_state_mgr_reset_node_count, sm);
227 cl_qmap_apply_func(&sm->p_subn->port_guid_tbl,
228 __osm_state_mgr_reset_port_count, sm);
230 cl_qmap_apply_func(&sm->p_subn->sw_guid_tbl,
231 __osm_state_mgr_reset_switch_count, sm);
233 /* Set the in_sweep_hop_0 flag in subn to be TRUE.
234 * This will indicate the sweeping not to continue beyond the
236 * This is relevant for the case of SM on switch, since in the
237 * switch info we need to signal somehow not to continue
239 sm->p_subn->in_sweep_hop_0 = TRUE;
241 CL_PLOCK_RELEASE(sm->p_lock);
243 osm_dr_path_init(&dr_path, h_bind, 0, path_array);
244 status = osm_req_get(sm, &dr_path, IB_MAD_ATTR_NODE_INFO, 0,
245 CL_DISP_MSGID_NONE, NULL);
247 if (status != IB_SUCCESS)
248 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3305: "
249 "Request for NodeInfo failed\n");
251 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
252 "No bound ports. Deferring sweep...\n");
253 status = IB_INVALID_STATE;
256 OSM_LOG_EXIT(sm->p_log);
260 /**********************************************************************
261 Clear out all existing port lid assignments
262 **********************************************************************/
263 static ib_api_status_t __osm_state_mgr_clean_known_lids(IN osm_sm_t * sm)
265 ib_api_status_t status = IB_SUCCESS;
266 cl_ptr_vector_t *p_vec = &(sm->p_subn->port_lid_tbl);
269 OSM_LOG_ENTER(sm->p_log);
271 /* we need a lock here! */
272 CL_PLOCK_ACQUIRE(sm->p_lock);
274 for (i = 0; i < cl_ptr_vector_get_size(p_vec); i++)
275 cl_ptr_vector_set(p_vec, i, NULL);
277 CL_PLOCK_RELEASE(sm->p_lock);
279 OSM_LOG_EXIT(sm->p_log);
283 /**********************************************************************
284 Notifies the transport layer that the local LID has changed,
285 which give it a chance to update address vectors, etc..
286 **********************************************************************/
287 static ib_api_status_t __osm_state_mgr_notify_lid_change(IN osm_sm_t * sm)
289 ib_api_status_t status;
290 osm_bind_handle_t h_bind;
292 OSM_LOG_ENTER(sm->p_log);
295 * First, get the bind handle.
297 h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl);
298 if (h_bind == OSM_BIND_INVALID_HANDLE) {
299 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3306: "
306 * Notify the transport layer that we changed the local LID.
308 status = osm_vendor_local_lid_change(h_bind);
309 if (status != IB_SUCCESS)
310 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3307: "
311 "Vendor LID update failed (%s)\n",
312 ib_get_err_str(status));
315 OSM_LOG_EXIT(sm->p_log);
319 /**********************************************************************
320 Returns true if the SM port is down.
321 The SM's port object must exist in the port_guid table.
322 **********************************************************************/
323 static boolean_t __osm_state_mgr_is_sm_port_down(IN osm_sm_t * sm)
325 ib_net64_t port_guid;
327 osm_physp_t *p_physp;
330 OSM_LOG_ENTER(sm->p_log);
332 port_guid = sm->p_subn->sm_port_guid;
335 * If we don't know our own port guid yet, assume the port is down.
337 if (port_guid == 0) {
338 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3308: "
339 "SM port GUID unknown\n");
340 state = IB_LINK_DOWN;
344 CL_ASSERT(port_guid);
346 CL_PLOCK_ACQUIRE(sm->p_lock);
347 p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
349 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3309: "
350 "SM port with GUID:%016" PRIx64 " is unknown\n",
351 cl_ntoh64(port_guid));
352 state = IB_LINK_DOWN;
353 CL_PLOCK_RELEASE(sm->p_lock);
357 p_physp = p_port->p_physp;
361 state = osm_physp_get_port_state(p_physp);
362 CL_PLOCK_RELEASE(sm->p_lock);
365 OSM_LOG_EXIT(sm->p_log);
366 return (state == IB_LINK_DOWN);
369 /**********************************************************************
370 Sweeps the node 1 hop away.
371 This sets off a "chain reaction" that causes discovery of the subnet.
372 Used when there is suspicion that something on the subnet has changed.
373 **********************************************************************/
374 static ib_api_status_t __osm_state_mgr_sweep_hop_1(IN osm_sm_t * sm)
376 ib_api_status_t status = IB_SUCCESS;
377 osm_bind_handle_t h_bind;
378 osm_madw_context_t context;
381 osm_physp_t *p_physp;
382 osm_dr_path_t *p_dr_path;
383 osm_dr_path_t hop_1_path;
384 ib_net64_t port_guid;
386 uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX];
388 osm_physp_t *p_ext_physp;
390 OSM_LOG_ENTER(sm->p_log);
393 * First, get our own port and node objects.
395 port_guid = sm->p_subn->sm_port_guid;
397 CL_ASSERT(port_guid);
399 /* Set the in_sweep_hop_0 flag in subn to be FALSE.
400 * This will indicate the sweeping to continue beyond the
402 * This is relevant for the case of SM on switch, since in the
403 * switch info we need to signal that the sweeping should
404 * continue through the switch. */
405 sm->p_subn->in_sweep_hop_0 = FALSE;
407 p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
409 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3310: "
410 "No SM port object\n");
415 p_node = p_port->p_node;
418 port_num = ib_node_info_get_local_port_num(&p_node->node_info);
420 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
421 "Probing hop 1 on local port %u\n", port_num);
423 p_physp = osm_node_get_physp_ptr(p_node, port_num);
427 p_dr_path = osm_physp_get_dr_path_ptr(p_physp);
428 h_bind = osm_dr_path_get_bind_handle(p_dr_path);
430 CL_ASSERT(h_bind != OSM_BIND_INVALID_HANDLE);
432 memset(path_array, 0, sizeof(path_array));
433 /* the hop_1 operations depend on the type of our node.
434 * Currently - legal nodes that can host SM are SW and CA */
435 switch (osm_node_get_type(p_node)) {
436 case IB_NODE_TYPE_CA:
437 case IB_NODE_TYPE_ROUTER:
438 memset(&context, 0, sizeof(context));
439 context.ni_context.node_guid = osm_node_get_node_guid(p_node);
440 context.ni_context.port_num = port_num;
442 path_array[1] = port_num;
444 osm_dr_path_init(&hop_1_path, h_bind, 1, path_array);
445 status = osm_req_get(sm, &hop_1_path, IB_MAD_ATTR_NODE_INFO, 0,
446 CL_DISP_MSGID_NONE, &context);
447 if (status != IB_SUCCESS)
448 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3311: "
449 "Request for NodeInfo failed\n");
452 case IB_NODE_TYPE_SWITCH:
453 /* Need to go over all the ports of the switch, and send a node_info
454 * from them. This doesn't include the port 0 of the switch, which
456 * Note: We'll send another switchInfo on port 0, since if no ports
457 * are connected, we still want to get some response, and have the
460 num_ports = osm_node_get_num_physp(p_node);
461 for (port_num = 0; port_num < num_ports; port_num++) {
462 /* go through the port only if the port is not DOWN */
463 p_ext_physp = osm_node_get_physp_ptr(p_node, port_num);
464 if (p_ext_physp && ib_port_info_get_port_state
465 (&(p_ext_physp->port_info)) > IB_LINK_DOWN) {
466 memset(&context, 0, sizeof(context));
467 context.ni_context.node_guid =
468 osm_node_get_node_guid(p_node);
469 context.ni_context.port_num = port_num;
471 path_array[1] = port_num;
472 osm_dr_path_init(&hop_1_path, h_bind, 1,
474 status = osm_req_get(sm, &hop_1_path,
475 IB_MAD_ATTR_NODE_INFO, 0,
479 if (status != IB_SUCCESS)
480 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3312: "
481 "Request for NodeInfo failed\n");
487 OSM_LOG(sm->p_log, OSM_LOG_ERROR,
488 "ERR 3313: Unknown node type %d (%s)\n",
489 osm_node_get_type(p_node), p_node->print_desc);
493 OSM_LOG_EXIT(sm->p_log);
497 static void query_sm_info(cl_map_item_t *item, void *cxt)
499 osm_madw_context_t context;
500 osm_remote_sm_t *r_sm = cl_item_obj(item, r_sm, map_item);
504 context.smi_context.port_guid = r_sm->p_port->guid;
505 context.smi_context.set_method = FALSE;
506 context.smi_context.light_sweep = TRUE;
508 ret = osm_req_get(sm, osm_physp_get_dr_path_ptr(r_sm->p_port->p_physp),
509 IB_MAD_ATTR_SM_INFO, 0, CL_DISP_MSGID_NONE, &context);
510 if (ret != IB_SUCCESS)
511 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3314: "
512 "Failure requesting SMInfo (%s)\n",
513 ib_get_err_str(ret));
516 /**********************************************************************
517 During a light sweep check each node to see if the node descriptor is valid
518 if not issue a ND query.
519 **********************************************************************/
520 static void __osm_state_mgr_get_node_desc(IN cl_map_item_t * const p_object,
523 osm_madw_context_t mad_context;
524 osm_node_t *const p_node = (osm_node_t *) p_object;
525 osm_sm_t *sm = context;
526 osm_physp_t *p_physp = NULL;
527 unsigned i, num_ports;
528 ib_api_status_t status;
530 OSM_LOG_ENTER(sm->p_log);
534 if (p_node->print_desc && strcmp(p_node->print_desc, OSM_NODE_DESC_UNKNOWN))
535 /* if ND is valid, do nothing */
538 OSM_LOG(sm->p_log, OSM_LOG_ERROR,
539 "ERR 3319: Unknown node description for node GUID "
540 "0x%016" PRIx64 ". Reissuing ND query\n",
541 cl_ntoh64(osm_node_get_node_guid (p_node)));
543 /* get a physp to request from. */
544 num_ports = osm_node_get_num_physp(p_node);
545 for (i = 0; i < num_ports; i++)
546 if ((p_physp = osm_node_get_physp_ptr(p_node, i)))
550 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 331C: "
551 "Failed to find any valid physical port object.\n");
555 mad_context.nd_context.node_guid = osm_node_get_node_guid(p_node);
557 status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp),
558 IB_MAD_ATTR_NODE_DESC, 0, CL_DISP_MSGID_NONE,
560 if (status != IB_SUCCESS)
561 OSM_LOG(sm->p_log, OSM_LOG_ERROR,
562 "ERR 331B: Failure initiating NodeDescription request "
563 "(%s)\n", ib_get_err_str(status));
566 OSM_LOG_EXIT(sm->p_log);
569 /**********************************************************************
570 Initiates a lightweight sweep of the subnet.
571 Used during normal sweeps after the subnet is up.
572 **********************************************************************/
573 static ib_api_status_t __osm_state_mgr_light_sweep_start(IN osm_sm_t * sm)
575 ib_api_status_t status = IB_SUCCESS;
576 osm_bind_handle_t h_bind;
578 cl_map_item_t *p_next;
580 osm_physp_t *p_physp;
583 OSM_LOG_ENTER(sm->p_log);
585 p_sw_tbl = &sm->p_subn->sw_guid_tbl;
588 * First, get the bind handle.
590 h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl);
591 if (h_bind == OSM_BIND_INVALID_HANDLE) {
592 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
593 "No bound ports. Deferring sweep...\n");
594 status = IB_INVALID_STATE;
598 OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "INITIATING LIGHT SWEEP");
599 CL_PLOCK_ACQUIRE(sm->p_lock);
600 cl_qmap_apply_func(p_sw_tbl, __osm_state_mgr_get_sw_info, sm);
601 CL_PLOCK_RELEASE(sm->p_lock);
603 CL_PLOCK_ACQUIRE(sm->p_lock);
604 cl_qmap_apply_func(&sm->p_subn->node_guid_tbl, __osm_state_mgr_get_node_desc, sm);
605 CL_PLOCK_RELEASE(sm->p_lock);
607 /* now scan the list of physical ports that were not down but have no remote port */
608 CL_PLOCK_ACQUIRE(sm->p_lock);
609 p_next = cl_qmap_head(&sm->p_subn->node_guid_tbl);
610 while (p_next != cl_qmap_end(&sm->p_subn->node_guid_tbl)) {
611 p_node = (osm_node_t *) p_next;
612 p_next = cl_qmap_next(p_next);
614 for (port_num = 1; port_num < osm_node_get_num_physp(p_node);
616 p_physp = osm_node_get_physp_ptr(p_node, port_num);
617 if (p_physp && (osm_physp_get_port_state(p_physp) !=
619 && !osm_physp_get_remote(p_physp)) {
620 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3315: "
621 "Unknown remote side for node 0x%016"
623 "(%s) port %u. Adding to light sweep sampling list\n",
624 cl_ntoh64(osm_node_get_node_guid
626 p_node->print_desc, port_num);
628 osm_dump_dr_path(sm->p_log,
629 osm_physp_get_dr_path_ptr
630 (p_physp), OSM_LOG_ERROR);
632 __osm_state_mgr_get_remote_port_info(sm,
638 cl_qmap_apply_func(&sm->p_subn->sm_guid_tbl, query_sm_info, sm);
640 CL_PLOCK_RELEASE(sm->p_lock);
643 OSM_LOG_EXIT(sm->p_log);
647 /**********************************************************************
648 * Go over all the remote SMs (as updated in the sm_guid_tbl).
649 * Find if there is a remote sm that is a master SM.
650 * If there is a remote master SM - return a pointer to it,
651 * else - return NULL.
652 **********************************************************************/
653 static osm_remote_sm_t *__osm_state_mgr_exists_other_master_sm(IN osm_sm_t * sm)
656 osm_remote_sm_t *p_sm;
657 osm_remote_sm_t *p_sm_res = NULL;
659 OSM_LOG_ENTER(sm->p_log);
661 p_sm_tbl = &sm->p_subn->sm_guid_tbl;
663 /* go over all the remote SMs */
664 for (p_sm = (osm_remote_sm_t *) cl_qmap_head(p_sm_tbl);
665 p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_tbl);
666 p_sm = (osm_remote_sm_t *) cl_qmap_next(&p_sm->map_item)) {
667 /* If the sm is in MASTER state - return a pointer to it */
668 if (ib_sminfo_get_state(&p_sm->smi) == IB_SMINFO_STATE_MASTER) {
669 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
670 "Found remote master SM with guid:0x%016" PRIx64
671 " (node %s)\n", cl_ntoh64(p_sm->smi.guid),
672 p_sm->p_port->p_node ? p_sm->p_port->p_node->
673 print_desc : "UNKNOWN");
680 OSM_LOG_EXIT(sm->p_log);
684 /**********************************************************************
685 * Go over all remote SMs (as updated in the sm_guid_tbl).
686 * Find the one with the highest priority and lowest guid.
687 * Compare this SM to the local SM. If the local SM is higher -
688 * return NULL, if the remote SM is higher - return a pointer to it.
689 **********************************************************************/
690 static osm_remote_sm_t *__osm_state_mgr_get_highest_sm(IN osm_sm_t * sm)
693 osm_remote_sm_t *p_sm = NULL;
694 osm_remote_sm_t *p_highest_sm;
695 uint8_t highest_sm_priority;
696 ib_net64_t highest_sm_guid;
698 OSM_LOG_ENTER(sm->p_log);
700 p_sm_tbl = &sm->p_subn->sm_guid_tbl;
702 /* Start with the local sm as the standard */
704 highest_sm_priority = sm->p_subn->opt.sm_priority;
705 highest_sm_guid = sm->p_subn->sm_port_guid;
707 /* go over all the remote SMs */
708 for (p_sm = (osm_remote_sm_t *) cl_qmap_head(p_sm_tbl);
709 p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_tbl);
710 p_sm = (osm_remote_sm_t *) cl_qmap_next(&p_sm->map_item)) {
712 /* If the sm is in NOTACTIVE state - continue */
713 if (ib_sminfo_get_state(&p_sm->smi) ==
714 IB_SMINFO_STATE_NOTACTIVE)
717 if (osm_sm_is_greater_than(ib_sminfo_get_priority(&p_sm->smi),
718 p_sm->smi.guid, highest_sm_priority,
720 /* the new p_sm is with higher priority - update the highest_sm */
723 highest_sm_priority =
724 ib_sminfo_get_priority(&p_sm->smi);
725 highest_sm_guid = p_sm->smi.guid;
729 if (p_highest_sm != NULL)
730 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
731 "Found higher SM with guid: %016" PRIx64 " (node %s)\n",
732 cl_ntoh64(p_highest_sm->smi.guid),
733 p_highest_sm->p_port->p_node ?
734 p_highest_sm->p_port->p_node->print_desc : "UNKNOWN");
736 OSM_LOG_EXIT(sm->p_log);
737 return (p_highest_sm);
740 /**********************************************************************
741 * Send SubnSet(SMInfo) SMP with HANDOVER attribute to the
742 * remote_sm indicated.
743 **********************************************************************/
745 __osm_state_mgr_send_handover(IN osm_sm_t * const sm,
746 IN osm_remote_sm_t * const p_sm)
748 uint8_t payload[IB_SMP_DATA_SIZE];
749 ib_sm_info_t *p_smi = (ib_sm_info_t *) payload;
750 osm_madw_context_t context;
751 const osm_port_t *p_port;
752 ib_api_status_t status;
754 OSM_LOG_ENTER(sm->p_log);
757 * Send a query of SubnSet(SMInfo) HANDOVER to the remote sm given.
760 memset(&context, 0, sizeof(context));
761 p_port = p_sm->p_port;
762 if (p_port == NULL) {
763 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3316: "
764 "No port object on given remote_sm object\n");
768 /* update the master_guid in the sm_state_mgr object according to */
769 /* the guid of the port where the new Master SM should reside. */
770 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
771 "Handing over mastership. Updating sm_state_mgr master_guid: %016"
772 PRIx64 " (node %s)\n", cl_ntoh64(p_port->guid),
773 p_port->p_node ? p_port->p_node->print_desc : "UNKNOWN");
774 sm->master_sm_guid = p_port->guid;
776 context.smi_context.port_guid = p_port->guid;
777 context.smi_context.set_method = TRUE;
779 p_smi->guid = sm->p_subn->sm_port_guid;
780 p_smi->act_count = cl_hton32(sm->p_subn->p_osm->stats.qp0_mads_sent);
781 p_smi->pri_state = (uint8_t) (sm->p_subn->sm_state |
782 sm->p_subn->opt.sm_priority << 4);
784 * Return 0 for the SM key unless we authenticate the requester
787 if (ib_sminfo_get_state(&p_sm->smi) == IB_SMINFO_STATE_MASTER) {
788 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
789 "Responding to master SM with real sm_key\n");
790 p_smi->sm_key = sm->p_subn->opt.sm_key;
792 /* The requester is not authenticated as master - set sm_key to zero */
793 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
794 "Responding to SM not master with zero sm_key\n");
798 status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_port->p_physp),
799 payload, sizeof(payload), IB_MAD_ATTR_SM_INFO,
800 IB_SMINFO_ATTR_MOD_HANDOVER, CL_DISP_MSGID_NONE,
803 if (status != IB_SUCCESS)
804 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3317: "
805 "Failure requesting SMInfo (%s)\n",
806 ib_get_err_str(status));
809 OSM_LOG_EXIT(sm->p_log);
812 /**********************************************************************
813 * Send Trap 64 on all new ports.
814 **********************************************************************/
815 static void __osm_state_mgr_report_new_ports(IN osm_sm_t * sm)
818 ib_mad_notice_attr_t notice;
819 ib_api_status_t status;
820 ib_net64_t port_guid;
821 cl_map_item_t *p_next;
826 OSM_LOG_ENTER(sm->p_log);
828 CL_PLOCK_ACQUIRE(sm->p_lock);
829 p_next = cl_qmap_head(&sm->p_subn->port_guid_tbl);
830 while (p_next != cl_qmap_end(&sm->p_subn->port_guid_tbl)) {
831 p_port = (osm_port_t *) p_next;
832 p_next = cl_qmap_next(p_next);
837 port_guid = osm_port_get_guid(p_port);
838 /* issue a notice - trap 64 */
840 /* details of the notice */
841 notice.generic_type = 0x83; /* is generic subn mgt type */
842 ib_notice_set_prod_type_ho(¬ice, 4); /* A Class Manager generator */
843 /* endport becomes to be reachable */
844 notice.g_or_v.generic.trap_num = CL_HTON16(64);
845 /* The sm_base_lid is saved in network order already. */
846 notice.issuer_lid = sm->p_subn->sm_base_lid;
847 /* following C14-72.1.1 and table 119 p739 */
848 /* we need to provide the GID */
849 port_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix;
850 port_gid.unicast.interface_id = port_guid;
851 memcpy(&(notice.data_details.ntc_64_67.gid), &(port_gid),
854 /* According to page 653 - the issuer gid in this case of trap
855 * is the SM gid, since the SM is the initiator of this trap. */
856 notice.issuer_gid.unicast.prefix =
857 sm->p_subn->opt.subnet_prefix;
858 notice.issuer_gid.unicast.interface_id =
859 sm->p_subn->sm_port_guid;
861 status = osm_report_notice(sm->p_log, sm->p_subn, ¬ice);
862 if (status != IB_SUCCESS)
863 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3318: "
864 "Error sending trap reports on GUID:0x%016"
865 PRIx64 " (%s)\n", port_gid.unicast.interface_id,
866 ib_get_err_str(status));
867 osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho);
868 OSM_LOG(sm->p_log, OSM_LOG_INFO,
869 "Discovered new port with GUID:0x%016" PRIx64
870 " LID range [%u,%u] of node:%s\n",
871 cl_ntoh64(port_gid.unicast.interface_id),
872 min_lid_ho, max_lid_ho,
873 p_port->p_node ? p_port->p_node->
874 print_desc : "UNKNOWN");
878 CL_PLOCK_RELEASE(sm->p_lock);
880 OSM_LOG_EXIT(sm->p_log);
883 /**********************************************************************
884 * Make sure that the lid_port_tbl of the subnet has only the ports
885 * that are recognized, and in the correct lid place. There could be
886 * errors if we wanted to assign a certain port with lid X, but that
887 * request didn't reach the port. In this case port_lid_tbl will have
888 * the port under lid X, though the port isn't updated with this lid.
889 * We will run a new heavy sweep (since there were errors in the
890 * initialization), but here we'll clean the database from incorrect
892 **********************************************************************/
893 static void __osm_state_mgr_check_tbl_consistency(IN osm_sm_t * sm)
895 cl_qmap_t *p_port_guid_tbl;
897 osm_port_t *p_next_port;
898 cl_ptr_vector_t *p_port_lid_tbl;
899 size_t max_lid, ref_size, curr_size, lid;
900 osm_port_t *p_port_ref, *p_port_stored;
901 cl_ptr_vector_t ref_port_lid_tbl;
906 OSM_LOG_ENTER(sm->p_log);
908 cl_ptr_vector_construct(&ref_port_lid_tbl);
909 cl_ptr_vector_init(&ref_port_lid_tbl,
910 cl_ptr_vector_get_size(&sm->p_subn->port_lid_tbl),
911 OSM_SUBNET_VECTOR_GROW_SIZE);
913 p_port_guid_tbl = &sm->p_subn->port_guid_tbl;
915 /* Let's go over all the ports according to port_guid_tbl,
916 * and add the port to a reference port_lid_tbl. */
917 p_next_port = (osm_port_t *) cl_qmap_head(p_port_guid_tbl);
918 while (p_next_port != (osm_port_t *) cl_qmap_end(p_port_guid_tbl)) {
919 p_port = p_next_port;
921 (osm_port_t *) cl_qmap_next(&p_next_port->map_item);
923 osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho);
924 for (lid_ho = min_lid_ho; lid_ho <= max_lid_ho; lid_ho++)
925 cl_ptr_vector_set(&ref_port_lid_tbl, lid_ho, p_port);
928 p_port_lid_tbl = &sm->p_subn->port_lid_tbl;
930 ref_size = cl_ptr_vector_get_size(&ref_port_lid_tbl);
931 curr_size = cl_ptr_vector_get_size(p_port_lid_tbl);
932 /* They should be the same, but compare it anyway */
933 max_lid = (ref_size > curr_size) ? ref_size : curr_size;
935 for (lid = 1; lid <= max_lid; lid++) {
937 p_port_stored = NULL;
938 cl_ptr_vector_at(p_port_lid_tbl, lid, (void *)&p_port_stored);
939 cl_ptr_vector_at(&ref_port_lid_tbl, lid, (void *)&p_port_ref);
941 if (p_port_stored == p_port_ref)
942 /* This is the "good" case - both entries are the
943 * same for this lid. Nothing to do. */
946 if (p_port_ref == NULL)
947 /* There is an object in the subnet database for this
948 * lid, but no such object exists in the reference
949 * port_list_tbl. This can occur if we wanted to assign
950 * a certain port with some lid (different than the one
951 * pre-assigned to it), and the port didn't get the
952 * PortInfo Set request. Due to this, the port is
953 * updated with its original lid in our database, but
954 * with the new lid we wanted to give it in our
956 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3322: "
957 "lid %zu is wrongly assigned to port 0x%016"
958 PRIx64 " (\'%s\' port %u) in port_lid_tbl\n",
960 cl_ntoh64(osm_port_get_guid(p_port_stored)),
961 p_port_stored->p_node->print_desc,
962 p_port_stored->p_physp->port_num);
963 else if (p_port_stored == NULL)
964 /* There is an object in the new database, but no
965 * object in our subnet database. This is the matching
966 * case of the prior check - the port still has its
968 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3323: "
969 "port 0x%016" PRIx64 " (\'%s\' port %u)"
970 " exists in new port_lid_tbl under lid %zu,"
971 " but missing in subnet port_lid_tbl db\n",
972 cl_ntoh64(osm_port_get_guid(p_port_ref)),
973 p_port_ref->p_node->print_desc,
974 p_port_ref->p_physp->port_num, lid);
976 /* if we reached here then p_port_stored != p_port_ref.
977 * We were trying to set a lid to p_port_stored, but
978 * it didn't reach it, and p_port_ref also didn't get
980 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3324: "
981 "lid %zu has port 0x%016" PRIx64
982 " (\'%s\' port %u) in new port_lid_tbl db, "
983 "and port 0x%016" PRIx64 " (\'%s\' port %u)"
984 " in subnet port_lid_tbl db\n", lid,
985 cl_ntoh64(osm_port_get_guid(p_port_ref)),
986 p_port_ref->p_node->print_desc,
987 p_port_ref->p_physp->port_num,
988 cl_ntoh64(osm_port_get_guid(p_port_stored)),
989 p_port_ref->p_node->print_desc,
990 p_port_ref->p_physp->port_num);
992 /* In any of these cases we want to set NULL in the
993 * port_lid_tbl, since this entry is invalid. Also, make sure
994 * we'll do another heavy sweep. */
995 cl_ptr_vector_set(p_port_lid_tbl, lid, NULL);
996 sm->p_subn->subnet_initialization_error = TRUE;
999 cl_ptr_vector_destroy(&ref_port_lid_tbl);
1000 OSM_LOG_EXIT(sm->p_log);
1003 static void cleanup_switch(cl_map_item_t *item, void *log)
1005 osm_switch_t *sw = (osm_switch_t *)item;
1010 if (memcmp(sw->lft, sw->new_lft, IB_LID_UCAST_END_HO + 1))
1011 osm_log(log, OSM_LOG_ERROR, "ERR 331D: "
1012 "LFT of switch 0x%016" PRIx64 " is not up to date.\n",
1013 cl_ntoh64(sw->p_node->node_info.node_guid));
1020 /**********************************************************************
1021 **********************************************************************/
1022 int wait_for_pending_transactions(osm_stats_t * stats)
1024 #ifdef HAVE_LIBPTHREAD
1025 pthread_mutex_lock(&stats->mutex);
1026 while (stats->qp0_mads_outstanding && !osm_exit_flag)
1027 pthread_cond_wait(&stats->cond, &stats->mutex);
1028 pthread_mutex_unlock(&stats->mutex);
1031 unsigned count = stats->qp0_mads_outstanding;
1032 if (!count || osm_exit_flag)
1034 cl_event_wait_on(&stats->event, EVENT_NO_TIMEOUT, TRUE);
1037 return osm_exit_flag;
1040 static void do_sweep(osm_sm_t * sm)
1042 ib_api_status_t status;
1043 osm_remote_sm_t *p_remote_sm;
1045 if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER &&
1046 sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING)
1049 if (sm->p_subn->coming_out_of_standby)
1051 * Need to force re-write of sm_base_lid to all ports
1052 * to do that we want all the ports to be considered
1055 __osm_state_mgr_clean_known_lids(sm);
1057 sm->master_sm_found = 0;
1060 * If we already have switches, then try a light sweep.
1061 * Otherwise, this is probably our first discovery pass
1062 * or we are connected in loopback. In both cases do a
1064 * Note: If we are connected in loopback we want a heavy
1065 * sweep, since we will not be getting any traps if there is
1066 * a lost connection.
1068 /* if we are in DISCOVERING state - this means it is either in
1069 * initializing or wake up from STANDBY - run the heavy sweep */
1070 if (cl_qmap_count(&sm->p_subn->sw_guid_tbl)
1071 && sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING
1072 && sm->p_subn->opt.force_heavy_sweep == FALSE
1073 && sm->p_subn->force_heavy_sweep == FALSE
1074 && sm->p_subn->force_reroute == FALSE
1075 && sm->p_subn->subnet_initialization_error == FALSE
1076 && (__osm_state_mgr_light_sweep_start(sm) == IB_SUCCESS)) {
1077 if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1079 if (!sm->p_subn->force_heavy_sweep) {
1080 OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
1081 "LIGHT SWEEP COMPLETE");
1087 * Unicast cache should be invalidated if there were errors
1088 * during initialization or if subnet re-route is requested.
1090 if (sm->p_subn->opt.use_ucast_cache &&
1091 (sm->p_subn->subnet_initialization_error ||
1092 sm->p_subn->force_reroute))
1093 osm_ucast_cache_invalidate(&sm->ucast_mgr);
1096 * If we don't need to do a heavy sweep and we want to do a reroute,
1097 * just reroute only.
1099 if (cl_qmap_count(&sm->p_subn->sw_guid_tbl)
1100 && sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING
1101 && sm->p_subn->opt.force_heavy_sweep == FALSE
1102 && sm->p_subn->force_heavy_sweep == FALSE
1103 && sm->p_subn->force_reroute == TRUE
1104 && sm->p_subn->subnet_initialization_error == FALSE) {
1106 sm->p_subn->force_reroute = FALSE;
1108 /* Re-program the switches fully */
1109 sm->p_subn->ignore_existing_lfts = TRUE;
1111 osm_ucast_mgr_process(&sm->ucast_mgr);
1114 sm->p_subn->ignore_existing_lfts = FALSE;
1116 if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1119 if (!sm->p_subn->subnet_initialization_error) {
1120 OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
1121 "REROUTE COMPLETE");
1126 /* go to heavy sweep */
1129 /* First of all - unset all flags */
1130 sm->p_subn->force_heavy_sweep = FALSE;
1131 sm->p_subn->force_reroute = FALSE;
1132 sm->p_subn->subnet_initialization_error = FALSE;
1134 /* rescan configuration updates */
1135 if (osm_subn_rescan_conf_files(sm->p_subn) < 0)
1136 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 331A: "
1137 "osm_subn_rescan_conf_file failed\n");
1139 if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER)
1140 sm->p_subn->need_update = 1;
1142 status = __osm_state_mgr_sweep_hop_0(sm);
1143 if (status != IB_SUCCESS ||
1144 wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1147 if (__osm_state_mgr_is_sm_port_down(sm) == TRUE) {
1148 osm_log(sm->p_log, OSM_LOG_SYS, "SM port is down\n");
1149 OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "SM PORT DOWN");
1151 /* Run the drop manager - we want to clear all records */
1152 osm_drop_mgr_process(sm);
1154 /* Move to DISCOVERING state */
1155 osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVER);
1159 status = __osm_state_mgr_sweep_hop_1(sm);
1160 if (status != IB_SUCCESS ||
1161 wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1164 /* discovery completed - check other sm presense */
1165 if (sm->master_sm_found) {
1167 * Call the sm_state_mgr with signal
1168 * MASTER_OR_HIGHER_SM_DETECTED_DONE
1170 osm_sm_state_mgr_process(sm,
1171 OSM_SM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED);
1172 OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
1173 "ENTERING STANDBY STATE");
1174 /* notify master SM about us */
1175 osm_send_trap144(sm, 0);
1179 /* if new sweep requested - don't bother with the rest */
1180 if (sm->p_subn->force_heavy_sweep)
1181 goto _repeat_discovery;
1183 OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "HEAVY SWEEP COMPLETE");
1185 /* If we are MASTER - get the highest remote_sm, and
1186 * see if it is higher than our local sm.
1188 if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER) {
1189 p_remote_sm = __osm_state_mgr_get_highest_sm(sm);
1190 if (p_remote_sm != NULL) {
1191 /* report new ports (trap 64) before leaving MASTER */
1192 __osm_state_mgr_report_new_ports(sm);
1194 /* need to handover the mastership
1195 * to the remote sm, and move to standby */
1196 __osm_state_mgr_send_handover(sm, p_remote_sm);
1197 osm_sm_state_mgr_process(sm,
1198 OSM_SM_SIGNAL_HANDOVER_SENT);
1201 /* We are the highest sm - check to see if there is
1202 * a remote SM that is in master state. */
1204 __osm_state_mgr_exists_other_master_sm(sm);
1205 if (p_remote_sm != NULL) {
1206 /* There is a remote SM that is master.
1207 * need to wait for that SM to relinquish control
1208 * of its portion of the subnet. C14-60.2.1.
1209 * Also - need to start polling on that SM. */
1210 sm->p_polling_sm = p_remote_sm;
1211 osm_sm_state_mgr_process(sm,
1212 OSM_SM_SIGNAL_WAIT_FOR_HANDOVER);
1218 /* Need to continue with lid assignment */
1219 osm_drop_mgr_process(sm);
1222 * If we are not MASTER already - this means that we are
1223 * in discovery state. call osm_sm_state_mgr with signal
1224 * DISCOVERY_COMPLETED
1226 if (sm->p_subn->sm_state == IB_SMINFO_STATE_DISCOVERING)
1227 osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVERY_COMPLETED);
1229 osm_pkey_mgr_process(sm->p_subn->p_osm);
1231 osm_qos_setup(sm->p_subn->p_osm);
1233 /* try to restore SA DB (this should be before lid_mgr
1234 because we may want to disable clients reregistration
1235 when SA DB is restored) */
1236 osm_sa_db_file_load(sm->p_subn->p_osm);
1238 if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1241 osm_lid_mgr_process_sm(&sm->lid_mgr);
1242 if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1245 OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
1246 "SM LID ASSIGNMENT COMPLETE - STARTING SUBNET LID CONFIG");
1247 __osm_state_mgr_notify_lid_change(sm);
1249 osm_lid_mgr_process_subnet(&sm->lid_mgr);
1250 if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1253 /* At this point we need to check the consistency of
1254 * the port_lid_tbl under the subnet. There might be
1255 * errors in it if PortInfo Set requests didn't reach
1256 * their destination. */
1257 __osm_state_mgr_check_tbl_consistency(sm);
1259 OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
1260 "LID ASSIGNMENT COMPLETE - STARTING SWITCH TABLE CONFIG");
1263 * Proceed with unicast forwarding table configuration.
1266 if (!sm->ucast_mgr.cache_valid ||
1267 osm_ucast_cache_process(&sm->ucast_mgr))
1268 osm_ucast_mgr_process(&sm->ucast_mgr);
1270 if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1273 /* cleanup switch lft buffers */
1274 cl_qmap_apply_func(&sm->p_subn->sw_guid_tbl, cleanup_switch, sm->p_log);
1276 /* We are done setting all LFTs so clear the ignore existing.
1277 * From now on, as long as we are still master, we want to
1278 * take into account these lfts. */
1279 sm->p_subn->ignore_existing_lfts = FALSE;
1281 OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
1282 "SWITCHES CONFIGURED FOR UNICAST");
1284 if (!sm->p_subn->opt.disable_multicast) {
1285 osm_mcast_mgr_process(sm);
1286 if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1288 OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
1289 "SWITCHES CONFIGURED FOR MULTICAST");
1293 * The LINK_PORTS state is required since we cannot count on
1294 * the port state change MADs to succeed. This is an artifact
1295 * of the spec defining state change from state X to state X
1296 * as an error. The hardware then is not required to process
1297 * other parameters provided by the Set(PortInfo) Packet.
1300 osm_link_mgr_process(sm, IB_LINK_NO_CHANGE);
1301 if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1304 OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
1305 "LINKS PORTS CONFIGURED - SET LINKS TO ARMED STATE");
1307 osm_link_mgr_process(sm, IB_LINK_ARMED);
1308 if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1311 OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
1312 "LINKS ARMED - SET LINKS TO ACTIVE STATE");
1314 osm_link_mgr_process(sm, IB_LINK_ACTIVE);
1315 if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1319 * The sweep completed!
1323 * Send trap 64 on newly discovered endports
1325 __osm_state_mgr_report_new_ports(sm);
1327 /* in any case we zero this flag */
1328 sm->p_subn->coming_out_of_standby = FALSE;
1330 /* If there were errors - then the subnet is not really up */
1331 if (sm->p_subn->subnet_initialization_error == TRUE) {
1332 osm_log(sm->p_log, OSM_LOG_SYS,
1333 "Errors during initialization\n");
1334 OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_ERROR,
1335 "ERRORS DURING INITIALIZATION");
1337 sm->p_subn->need_update = 0;
1338 osm_dump_all(sm->p_subn->p_osm);
1339 __osm_state_mgr_up_msg(sm);
1340 sm->p_subn->first_time_master_sweep = FALSE;
1342 if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE))
1343 osm_sa_db_file_dump(sm->p_subn->p_osm);
1347 * Finally signal the subnet up event
1349 cl_event_signal(&sm->subnet_up_event);
1351 osm_opensm_report_event(sm->p_subn->p_osm, OSM_EVENT_ID_SUBNET_UP, NULL);
1353 /* if we got a signal to force heavy sweep or errors
1354 * in the middle of the sweep - try another sweep. */
1355 if (sm->p_subn->force_heavy_sweep
1356 || sm->p_subn->subnet_initialization_error)
1357 osm_sm_signal(sm, OSM_SIGNAL_SWEEP);
1360 static void do_process_mgrp_queue(osm_sm_t * sm)
1362 if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER)
1364 osm_mcast_mgr_process_mgroups(sm);
1365 wait_for_pending_transactions(&sm->p_subn->p_osm->stats);
1368 void osm_state_mgr_process(IN osm_sm_t * sm, IN osm_signal_t signal)
1372 OSM_LOG_ENTER(sm->p_log);
1374 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
1375 "Received signal %s in state %s\n",
1376 osm_get_sm_signal_str(signal),
1377 osm_get_sm_mgr_state_str(sm->p_subn->sm_state));
1380 case OSM_SIGNAL_SWEEP:
1384 case OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST:
1385 do_process_mgrp_queue(sm);
1390 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3320: "
1391 "Invalid SM signal %u\n", signal);
1395 OSM_LOG_EXIT(sm->p_log);