2 * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5 * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved.
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
39 * Implementation of osm_drop_mgr_t.
40 * This object represents the Drop Manager object.
41 * This object is part of the opensm family of objects.
46 #endif /* HAVE_CONFIG_H */
50 #include <iba/ib_types.h>
51 #include <complib/cl_qmap.h>
52 #include <complib/cl_passivelock.h>
53 #include <complib/cl_debug.h>
54 #include <complib/cl_ptr_vector.h>
55 #include <opensm/osm_sm.h>
56 #include <opensm/osm_router.h>
57 #include <opensm/osm_switch.h>
58 #include <opensm/osm_node.h>
59 #include <opensm/osm_helper.h>
60 #include <opensm/osm_mcm_info.h>
61 #include <opensm/osm_multicast.h>
62 #include <opensm/osm_remote_sm.h>
63 #include <opensm/osm_inform.h>
64 #include <opensm/osm_ucast_mgr.h>
66 /**********************************************************************
67 **********************************************************************/
69 __osm_drop_mgr_remove_router(osm_sm_t * sm, IN const ib_net64_t portguid)
72 cl_qmap_t *p_rtr_guid_tbl;
74 p_rtr_guid_tbl = &sm->p_subn->rtr_guid_tbl;
75 p_rtr = (osm_router_t *) cl_qmap_remove(p_rtr_guid_tbl, portguid);
76 if (p_rtr != (osm_router_t *) cl_qmap_end(p_rtr_guid_tbl)) {
77 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
78 "Cleaned router for port guid 0x%016" PRIx64 "\n",
80 osm_router_delete(&p_rtr);
84 /**********************************************************************
85 **********************************************************************/
86 static void drop_mgr_clean_physp(osm_sm_t * sm, IN osm_physp_t * p_physp)
88 osm_physp_t *p_remote_physp;
89 osm_port_t *p_remote_port;
91 p_remote_physp = osm_physp_get_remote(p_physp);
93 p_remote_port = osm_get_port_by_guid(sm->p_subn,
94 p_remote_physp->port_guid);
97 /* Let's check if this is a case of link that is lost (both ports
98 weren't recognized), or a "hiccup" in the subnet - in which case
99 the remote port was recognized, and its state is ACTIVE.
100 If this is just a "hiccup" - force a heavy sweep in the next sweep.
101 We don't want to lose that part of the subnet. */
102 if (p_remote_port->discovery_count &&
103 osm_physp_get_port_state(p_remote_physp) ==
105 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
106 "Forcing new heavy sweep. Remote "
107 "port 0x%016" PRIx64 " port num: %u "
108 "was recognized in ACTIVE state\n",
109 cl_ntoh64(p_remote_physp->port_guid),
110 p_remote_physp->port_num);
111 sm->p_subn->force_heavy_sweep = TRUE;
114 /* If the remote node is ca or router - need to remove the remote port,
115 since it is no longer reachable. This can be done if we reset the
116 discovery count of the remote port. */
117 if (!p_remote_physp->p_node->sw) {
118 p_remote_port->discovery_count = 0;
119 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
120 "Resetting discovery count of node: "
121 "0x%016" PRIx64 " port num:%u\n",
122 cl_ntoh64(osm_node_get_node_guid
123 (p_remote_physp->p_node)),
124 p_remote_physp->port_num);
128 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
129 "Unlinking local node 0x%016" PRIx64 ", port %u"
130 "\n\t\t\t\tand remote node 0x%016" PRIx64
132 cl_ntoh64(osm_node_get_node_guid(p_physp->p_node)),
134 cl_ntoh64(osm_node_get_node_guid
135 (p_remote_physp->p_node)),
136 p_remote_physp->port_num);
138 if (sm->ucast_mgr.cache_valid)
139 osm_ucast_cache_add_link(&sm->ucast_mgr,
140 p_physp, p_remote_physp);
142 osm_physp_unlink(p_physp, p_remote_physp);
146 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
147 "Clearing node 0x%016" PRIx64 " physical port number %u\n",
148 cl_ntoh64(osm_node_get_node_guid(p_physp->p_node)),
151 osm_physp_destroy(p_physp);
154 /**********************************************************************
155 **********************************************************************/
156 static void __osm_drop_mgr_remove_port(osm_sm_t * sm, IN osm_port_t * p_port)
158 ib_net64_t port_guid;
159 osm_port_t *p_port_check;
160 cl_qmap_t *p_sm_guid_tbl;
161 osm_mcm_info_t *p_mcm;
163 cl_ptr_vector_t *p_port_lid_tbl;
168 osm_remote_sm_t *p_sm;
170 ib_mad_notice_attr_t notice;
171 ib_api_status_t status;
173 OSM_LOG_ENTER(sm->p_log);
175 port_guid = osm_port_get_guid(p_port);
176 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
177 "Unreachable port 0x%016" PRIx64 "\n", cl_ntoh64(port_guid));
180 (osm_port_t *) cl_qmap_remove(&sm->p_subn->port_guid_tbl,
182 if (p_port_check != p_port) {
183 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0101: "
184 "Port 0x%016" PRIx64 " not in guid table\n",
185 cl_ntoh64(port_guid));
189 p_sm_guid_tbl = &sm->p_subn->sm_guid_tbl;
190 p_sm = (osm_remote_sm_t *) cl_qmap_remove(p_sm_guid_tbl, port_guid);
191 if (p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_guid_tbl)) {
192 /* need to remove this item */
193 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
194 "Cleaned SM for port guid 0x%016" PRIx64 "\n",
195 cl_ntoh64(port_guid));
200 __osm_drop_mgr_remove_router(sm, port_guid);
202 osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho);
204 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
205 "Clearing abandoned LID range [%u,%u]\n",
206 min_lid_ho, max_lid_ho);
208 p_port_lid_tbl = &sm->p_subn->port_lid_tbl;
209 for (lid_ho = min_lid_ho; lid_ho <= max_lid_ho; lid_ho++)
210 cl_ptr_vector_set(p_port_lid_tbl, lid_ho, NULL);
212 drop_mgr_clean_physp(sm, p_port->p_physp);
214 p_mcm = (osm_mcm_info_t *) cl_qlist_remove_head(&p_port->mcm_list);
215 while (p_mcm != (osm_mcm_info_t *) cl_qlist_end(&p_port->mcm_list)) {
216 p_mgrp = osm_get_mgrp_by_mlid(sm->p_subn, p_mcm->mlid);
218 osm_mgrp_delete_port(sm->p_subn, sm->p_log,
219 p_mgrp, p_port->guid);
220 osm_mcm_info_delete((osm_mcm_info_t *) p_mcm);
223 (osm_mcm_info_t *) cl_qlist_remove_head(&p_port->mcm_list);
226 /* initialize the p_node - may need to get node_desc later */
227 p_node = p_port->p_node;
229 osm_port_delete(&p_port);
231 /* issue a notice - trap 65 */
233 /* details of the notice */
234 notice.generic_type = 0x83; /* is generic subn mgt type */
235 ib_notice_set_prod_type_ho(¬ice, 4); /* A class manager generator */
236 /* endport ceases to be reachable */
237 notice.g_or_v.generic.trap_num = CL_HTON16(65);
238 /* The sm_base_lid is saved in network order already. */
239 notice.issuer_lid = sm->p_subn->sm_base_lid;
240 /* following C14-72.1.2 and table 119 p725 */
241 /* we need to provide the GID */
242 port_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix;
243 port_gid.unicast.interface_id = port_guid;
244 memcpy(&(notice.data_details.ntc_64_67.gid),
245 &(port_gid), sizeof(ib_gid_t));
247 /* According to page 653 - the issuer gid in this case of trap
248 is the SM gid, since the SM is the initiator of this trap. */
249 notice.issuer_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix;
250 notice.issuer_gid.unicast.interface_id = sm->p_subn->sm_port_guid;
252 status = osm_report_notice(sm->p_log, sm->p_subn, ¬ice);
253 if (status != IB_SUCCESS) {
254 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0103: "
255 "Error sending trap reports (%s)\n",
256 ib_get_err_str(status));
260 OSM_LOG(sm->p_log, OSM_LOG_INFO,
261 "Removed port with GUID:0x%016" PRIx64
262 " LID range [%u, %u] of node:%s\n",
263 cl_ntoh64(port_gid.unicast.interface_id),
264 min_lid_ho, max_lid_ho,
265 p_node ? p_node->print_desc : "UNKNOWN");
268 OSM_LOG_EXIT(sm->p_log);
271 /**********************************************************************
272 **********************************************************************/
273 static void __osm_drop_mgr_remove_switch(osm_sm_t * sm, IN osm_node_t * p_node)
276 cl_qmap_t *p_sw_guid_tbl;
277 ib_net64_t node_guid;
279 OSM_LOG_ENTER(sm->p_log);
281 node_guid = osm_node_get_node_guid(p_node);
282 p_sw_guid_tbl = &sm->p_subn->sw_guid_tbl;
284 p_sw = (osm_switch_t *) cl_qmap_remove(p_sw_guid_tbl, node_guid);
285 if (p_sw == (osm_switch_t *) cl_qmap_end(p_sw_guid_tbl)) {
286 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0102: "
287 "Node 0x%016" PRIx64 " not in switch table\n",
288 cl_ntoh64(osm_node_get_node_guid(p_node)));
291 osm_switch_delete(&p_sw);
294 OSM_LOG_EXIT(sm->p_log);
297 /**********************************************************************
298 **********************************************************************/
300 __osm_drop_mgr_process_node(osm_sm_t * sm, IN osm_node_t * p_node)
302 osm_physp_t *p_physp;
304 osm_node_t *p_node_check;
307 ib_net64_t port_guid;
308 boolean_t return_val = FALSE;
310 OSM_LOG_ENTER(sm->p_log);
312 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
313 "Unreachable node 0x%016" PRIx64 "\n",
314 cl_ntoh64(osm_node_get_node_guid(p_node)));
316 if (sm->ucast_mgr.cache_valid)
317 osm_ucast_cache_add_node(&sm->ucast_mgr, p_node);
320 Delete all the logical and physical port objects
321 associated with this node.
323 max_ports = osm_node_get_num_physp(p_node);
324 for (port_num = 0; port_num < max_ports; port_num++) {
325 p_physp = osm_node_get_physp_ptr(p_node, port_num);
327 port_guid = osm_physp_get_port_guid(p_physp);
329 p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
332 __osm_drop_mgr_remove_port(sm, p_port);
334 drop_mgr_clean_physp(sm, p_physp);
341 __osm_drop_mgr_remove_switch(sm, p_node);
344 (osm_node_t *) cl_qmap_remove(&sm->p_subn->node_guid_tbl,
345 osm_node_get_node_guid(p_node));
346 if (p_node_check != p_node) {
347 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0105: "
348 "Node 0x%016" PRIx64 " not in guid table\n",
349 cl_ntoh64(osm_node_get_node_guid(p_node)));
352 /* free memory allocated to node */
353 osm_node_delete(&p_node);
355 OSM_LOG_EXIT(sm->p_log);
359 /**********************************************************************
360 **********************************************************************/
361 static void __osm_drop_mgr_check_node(osm_sm_t * sm, IN osm_node_t * p_node)
363 ib_net64_t node_guid;
364 osm_physp_t *p_physp;
366 ib_net64_t port_guid;
368 OSM_LOG_ENTER(sm->p_log);
370 node_guid = osm_node_get_node_guid(p_node);
372 if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH) {
373 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0107: "
374 "Node 0x%016" PRIx64 " is not a switch node\n",
375 cl_ntoh64(node_guid));
379 /* Make sure we have a switch object for this node */
381 /* We do not have switch info for this node */
382 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
383 "Node 0x%016" PRIx64 " no switch in table\n",
384 cl_ntoh64(node_guid));
386 __osm_drop_mgr_process_node(sm, p_node);
390 /* Make sure we have a port object for port zero */
391 p_physp = osm_node_get_physp_ptr(p_node, 0);
393 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
394 "Node 0x%016" PRIx64 " no valid physical port 0\n",
395 cl_ntoh64(node_guid));
397 __osm_drop_mgr_process_node(sm, p_node);
401 port_guid = osm_physp_get_port_guid(p_physp);
403 p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
406 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
407 "Node 0x%016" PRIx64 " has no port object\n",
408 cl_ntoh64(node_guid));
410 __osm_drop_mgr_process_node(sm, p_node);
414 if (p_port->discovery_count == 0) {
415 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
416 "Node 0x%016" PRIx64 " port has discovery count zero\n",
417 cl_ntoh64(node_guid));
419 __osm_drop_mgr_process_node(sm, p_node);
424 OSM_LOG_EXIT(sm->p_log);
428 /**********************************************************************
429 **********************************************************************/
430 void osm_drop_mgr_process(osm_sm_t * sm)
432 cl_qmap_t *p_node_guid_tbl;
433 cl_qmap_t *p_port_guid_tbl;
435 osm_port_t *p_next_port;
437 osm_node_t *p_next_node;
441 OSM_LOG_ENTER(sm->p_log);
443 p_node_guid_tbl = &sm->p_subn->node_guid_tbl;
444 p_port_guid_tbl = &sm->p_subn->port_guid_tbl;
446 CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
448 p_next_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl);
449 while (p_next_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl)) {
450 p_node = p_next_node;
452 (osm_node_t *) cl_qmap_next(&p_next_node->map_item);
454 CL_ASSERT(cl_qmap_key(&p_node->map_item) ==
455 osm_node_get_node_guid(p_node));
457 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
458 "Checking node 0x%016" PRIx64 "\n",
459 cl_ntoh64(osm_node_get_node_guid(p_node)));
462 Check if this node was discovered during the last sweep.
463 If not, it is unreachable in the current subnet, and
464 should therefore be removed from the subnet object.
466 if (p_node->discovery_count == 0)
467 __osm_drop_mgr_process_node(sm, p_node);
471 Go over all the nodes. If the node is a switch - make sure
472 there is also a switch record for it, and a portInfo record for
473 port zero of of the node.
474 If not - this means that there was some error in getting the data
475 of this node. Drop the node.
477 p_next_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl);
478 while (p_next_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl)) {
479 p_node = p_next_node;
481 (osm_node_t *) cl_qmap_next(&p_next_node->map_item);
483 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
484 "Checking full discovery of node 0x%016" PRIx64 "\n",
485 cl_ntoh64(osm_node_get_node_guid(p_node)));
487 if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH)
490 /* We are handling a switch node */
491 __osm_drop_mgr_check_node(sm, p_node);
494 p_next_port = (osm_port_t *) cl_qmap_head(p_port_guid_tbl);
495 while (p_next_port != (osm_port_t *) cl_qmap_end(p_port_guid_tbl)) {
496 p_port = p_next_port;
498 (osm_port_t *) cl_qmap_next(&p_next_port->map_item);
500 CL_ASSERT(cl_qmap_key(&p_port->map_item) ==
501 osm_port_get_guid(p_port));
503 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
504 "Checking port 0x%016" PRIx64 "\n",
505 cl_ntoh64(osm_port_get_guid(p_port)));
508 If the port is unreachable, remove it from the guid table.
510 if (p_port->discovery_count == 0)
511 __osm_drop_mgr_remove_port(sm, p_port);
514 CL_PLOCK_RELEASE(sm->p_lock);
515 OSM_LOG_EXIT(sm->p_log);