]> CyberLeo.Net >> Repos - FreeBSD/releng/9.2.git/blob - contrib/ofed/management/opensm/opensm/osm_drop_mgr.c
- Copy stable/9 to releng/9.2 as part of the 9.2-RELEASE cycle.
[FreeBSD/releng/9.2.git] / contrib / ofed / management / opensm / opensm / osm_drop_mgr.c
1 /*
2  * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
3  * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
4  * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5  * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  *
35  */
36
37 /*
38  * Abstract:
39  *    Implementation of osm_drop_mgr_t.
40  * This object represents the Drop Manager object.
41  * This object is part of the opensm family of objects.
42  */
43
44 #if HAVE_CONFIG_H
45 #  include <config.h>
46 #endif                          /* HAVE_CONFIG_H */
47
48 #include <stdlib.h>
49 #include <string.h>
50 #include <iba/ib_types.h>
51 #include <complib/cl_qmap.h>
52 #include <complib/cl_passivelock.h>
53 #include <complib/cl_debug.h>
54 #include <complib/cl_ptr_vector.h>
55 #include <opensm/osm_sm.h>
56 #include <opensm/osm_router.h>
57 #include <opensm/osm_switch.h>
58 #include <opensm/osm_node.h>
59 #include <opensm/osm_helper.h>
60 #include <opensm/osm_mcm_info.h>
61 #include <opensm/osm_multicast.h>
62 #include <opensm/osm_remote_sm.h>
63 #include <opensm/osm_inform.h>
64 #include <opensm/osm_ucast_mgr.h>
65
66 /**********************************************************************
67  **********************************************************************/
68 static void
69 __osm_drop_mgr_remove_router(osm_sm_t * sm, IN const ib_net64_t portguid)
70 {
71         osm_router_t *p_rtr;
72         cl_qmap_t *p_rtr_guid_tbl;
73
74         p_rtr_guid_tbl = &sm->p_subn->rtr_guid_tbl;
75         p_rtr = (osm_router_t *) cl_qmap_remove(p_rtr_guid_tbl, portguid);
76         if (p_rtr != (osm_router_t *) cl_qmap_end(p_rtr_guid_tbl)) {
77                 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
78                         "Cleaned router for port guid 0x%016" PRIx64 "\n",
79                         cl_ntoh64(portguid));
80                 osm_router_delete(&p_rtr);
81         }
82 }
83
84 /**********************************************************************
85  **********************************************************************/
86 static void drop_mgr_clean_physp(osm_sm_t * sm, IN osm_physp_t * p_physp)
87 {
88         osm_physp_t *p_remote_physp;
89         osm_port_t *p_remote_port;
90
91         p_remote_physp = osm_physp_get_remote(p_physp);
92         if (p_remote_physp) {
93                 p_remote_port = osm_get_port_by_guid(sm->p_subn,
94                                                      p_remote_physp->port_guid);
95
96                 if (p_remote_port) {
97                         /* Let's check if this is a case of link that is lost (both ports
98                            weren't recognized), or a "hiccup" in the subnet - in which case
99                            the remote port was recognized, and its state is ACTIVE.
100                            If this is just a "hiccup" - force a heavy sweep in the next sweep.
101                            We don't want to lose that part of the subnet. */
102                         if (p_remote_port->discovery_count &&
103                             osm_physp_get_port_state(p_remote_physp) ==
104                             IB_LINK_ACTIVE) {
105                                 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
106                                         "Forcing new heavy sweep. Remote "
107                                         "port 0x%016" PRIx64 " port num: %u "
108                                         "was recognized in ACTIVE state\n",
109                                         cl_ntoh64(p_remote_physp->port_guid),
110                                         p_remote_physp->port_num);
111                                 sm->p_subn->force_heavy_sweep = TRUE;
112                         }
113
114                         /* If the remote node is ca or router - need to remove the remote port,
115                            since it is no longer reachable. This can be done if we reset the
116                            discovery count of the remote port. */
117                         if (!p_remote_physp->p_node->sw) {
118                                 p_remote_port->discovery_count = 0;
119                                 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
120                                         "Resetting discovery count of node: "
121                                         "0x%016" PRIx64 " port num:%u\n",
122                                         cl_ntoh64(osm_node_get_node_guid
123                                                   (p_remote_physp->p_node)),
124                                         p_remote_physp->port_num);
125                         }
126                 }
127
128                 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
129                         "Unlinking local node 0x%016" PRIx64 ", port %u"
130                         "\n\t\t\t\tand remote node 0x%016" PRIx64
131                         ", port %u\n",
132                         cl_ntoh64(osm_node_get_node_guid(p_physp->p_node)),
133                         p_physp->port_num,
134                         cl_ntoh64(osm_node_get_node_guid
135                                   (p_remote_physp->p_node)),
136                         p_remote_physp->port_num);
137
138                 if (sm->ucast_mgr.cache_valid)
139                         osm_ucast_cache_add_link(&sm->ucast_mgr,
140                                                  p_physp, p_remote_physp);
141
142                 osm_physp_unlink(p_physp, p_remote_physp);
143
144         }
145
146         OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
147                 "Clearing node 0x%016" PRIx64 " physical port number %u\n",
148                 cl_ntoh64(osm_node_get_node_guid(p_physp->p_node)),
149                 p_physp->port_num);
150
151         osm_physp_destroy(p_physp);
152 }
153
154 /**********************************************************************
155  **********************************************************************/
156 static void __osm_drop_mgr_remove_port(osm_sm_t * sm, IN osm_port_t * p_port)
157 {
158         ib_net64_t port_guid;
159         osm_port_t *p_port_check;
160         cl_qmap_t *p_sm_guid_tbl;
161         osm_mcm_info_t *p_mcm;
162         osm_mgrp_t *p_mgrp;
163         cl_ptr_vector_t *p_port_lid_tbl;
164         uint16_t min_lid_ho;
165         uint16_t max_lid_ho;
166         uint16_t lid_ho;
167         osm_node_t *p_node;
168         osm_remote_sm_t *p_sm;
169         ib_gid_t port_gid;
170         ib_mad_notice_attr_t notice;
171         ib_api_status_t status;
172
173         OSM_LOG_ENTER(sm->p_log);
174
175         port_guid = osm_port_get_guid(p_port);
176         OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
177                 "Unreachable port 0x%016" PRIx64 "\n", cl_ntoh64(port_guid));
178
179         p_port_check =
180             (osm_port_t *) cl_qmap_remove(&sm->p_subn->port_guid_tbl,
181                                           port_guid);
182         if (p_port_check != p_port) {
183                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0101: "
184                         "Port 0x%016" PRIx64 " not in guid table\n",
185                         cl_ntoh64(port_guid));
186                 goto Exit;
187         }
188
189         p_sm_guid_tbl = &sm->p_subn->sm_guid_tbl;
190         p_sm = (osm_remote_sm_t *) cl_qmap_remove(p_sm_guid_tbl, port_guid);
191         if (p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_guid_tbl)) {
192                 /* need to remove this item */
193                 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
194                         "Cleaned SM for port guid 0x%016" PRIx64 "\n",
195                         cl_ntoh64(port_guid));
196
197                 free(p_sm);
198         }
199
200         __osm_drop_mgr_remove_router(sm, port_guid);
201
202         osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho);
203
204         OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
205                 "Clearing abandoned LID range [%u,%u]\n",
206                 min_lid_ho, max_lid_ho);
207
208         p_port_lid_tbl = &sm->p_subn->port_lid_tbl;
209         for (lid_ho = min_lid_ho; lid_ho <= max_lid_ho; lid_ho++)
210                 cl_ptr_vector_set(p_port_lid_tbl, lid_ho, NULL);
211
212         drop_mgr_clean_physp(sm, p_port->p_physp);
213
214         p_mcm = (osm_mcm_info_t *) cl_qlist_remove_head(&p_port->mcm_list);
215         while (p_mcm != (osm_mcm_info_t *) cl_qlist_end(&p_port->mcm_list)) {
216                 p_mgrp = osm_get_mgrp_by_mlid(sm->p_subn, p_mcm->mlid);
217                 if (p_mgrp) {
218                         osm_mgrp_delete_port(sm->p_subn, sm->p_log,
219                                              p_mgrp, p_port->guid);
220                         osm_mcm_info_delete((osm_mcm_info_t *) p_mcm);
221                 }
222                 p_mcm =
223                     (osm_mcm_info_t *) cl_qlist_remove_head(&p_port->mcm_list);
224         }
225
226         /* initialize the p_node - may need to get node_desc later */
227         p_node = p_port->p_node;
228
229         osm_port_delete(&p_port);
230
231         /* issue a notice - trap 65 */
232
233         /* details of the notice */
234         notice.generic_type = 0x83;     /* is generic subn mgt type */
235         ib_notice_set_prod_type_ho(&notice, 4); /* A class manager generator */
236         /* endport ceases to be reachable */
237         notice.g_or_v.generic.trap_num = CL_HTON16(65);
238         /* The sm_base_lid is saved in network order already. */
239         notice.issuer_lid = sm->p_subn->sm_base_lid;
240         /* following C14-72.1.2 and table 119 p725 */
241         /* we need to provide the GID */
242         port_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix;
243         port_gid.unicast.interface_id = port_guid;
244         memcpy(&(notice.data_details.ntc_64_67.gid),
245                &(port_gid), sizeof(ib_gid_t));
246
247         /* According to page 653 - the issuer gid in this case of trap
248            is the SM gid, since the SM is the initiator of this trap. */
249         notice.issuer_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix;
250         notice.issuer_gid.unicast.interface_id = sm->p_subn->sm_port_guid;
251
252         status = osm_report_notice(sm->p_log, sm->p_subn, &notice);
253         if (status != IB_SUCCESS) {
254                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0103: "
255                         "Error sending trap reports (%s)\n",
256                         ib_get_err_str(status));
257                 goto Exit;
258         }
259
260         OSM_LOG(sm->p_log, OSM_LOG_INFO,
261                 "Removed port with GUID:0x%016" PRIx64
262                 " LID range [%u, %u] of node:%s\n",
263                 cl_ntoh64(port_gid.unicast.interface_id),
264                 min_lid_ho, max_lid_ho,
265                 p_node ? p_node->print_desc : "UNKNOWN");
266
267 Exit:
268         OSM_LOG_EXIT(sm->p_log);
269 }
270
271 /**********************************************************************
272  **********************************************************************/
273 static void __osm_drop_mgr_remove_switch(osm_sm_t * sm, IN osm_node_t * p_node)
274 {
275         osm_switch_t *p_sw;
276         cl_qmap_t *p_sw_guid_tbl;
277         ib_net64_t node_guid;
278
279         OSM_LOG_ENTER(sm->p_log);
280
281         node_guid = osm_node_get_node_guid(p_node);
282         p_sw_guid_tbl = &sm->p_subn->sw_guid_tbl;
283
284         p_sw = (osm_switch_t *) cl_qmap_remove(p_sw_guid_tbl, node_guid);
285         if (p_sw == (osm_switch_t *) cl_qmap_end(p_sw_guid_tbl)) {
286                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0102: "
287                         "Node 0x%016" PRIx64 " not in switch table\n",
288                         cl_ntoh64(osm_node_get_node_guid(p_node)));
289         } else {
290                 p_node->sw = NULL;
291                 osm_switch_delete(&p_sw);
292         }
293
294         OSM_LOG_EXIT(sm->p_log);
295 }
296
297 /**********************************************************************
298  **********************************************************************/
299 static boolean_t
300 __osm_drop_mgr_process_node(osm_sm_t * sm, IN osm_node_t * p_node)
301 {
302         osm_physp_t *p_physp;
303         osm_port_t *p_port;
304         osm_node_t *p_node_check;
305         uint32_t port_num;
306         uint32_t max_ports;
307         ib_net64_t port_guid;
308         boolean_t return_val = FALSE;
309
310         OSM_LOG_ENTER(sm->p_log);
311
312         OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
313                 "Unreachable node 0x%016" PRIx64 "\n",
314                 cl_ntoh64(osm_node_get_node_guid(p_node)));
315
316         if (sm->ucast_mgr.cache_valid)
317                 osm_ucast_cache_add_node(&sm->ucast_mgr, p_node);
318
319         /*
320            Delete all the logical and physical port objects
321            associated with this node.
322          */
323         max_ports = osm_node_get_num_physp(p_node);
324         for (port_num = 0; port_num < max_ports; port_num++) {
325                 p_physp = osm_node_get_physp_ptr(p_node, port_num);
326                 if (p_physp) {
327                         port_guid = osm_physp_get_port_guid(p_physp);
328
329                         p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
330
331                         if (p_port)
332                                 __osm_drop_mgr_remove_port(sm, p_port);
333                         else
334                                 drop_mgr_clean_physp(sm, p_physp);
335                 }
336         }
337
338         return_val = TRUE;
339
340         if (p_node->sw)
341                 __osm_drop_mgr_remove_switch(sm, p_node);
342
343         p_node_check =
344             (osm_node_t *) cl_qmap_remove(&sm->p_subn->node_guid_tbl,
345                                           osm_node_get_node_guid(p_node));
346         if (p_node_check != p_node) {
347                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0105: "
348                         "Node 0x%016" PRIx64 " not in guid table\n",
349                         cl_ntoh64(osm_node_get_node_guid(p_node)));
350         }
351
352         /* free memory allocated to node */
353         osm_node_delete(&p_node);
354
355         OSM_LOG_EXIT(sm->p_log);
356         return (return_val);
357 }
358
359 /**********************************************************************
360  **********************************************************************/
361 static void __osm_drop_mgr_check_node(osm_sm_t * sm, IN osm_node_t * p_node)
362 {
363         ib_net64_t node_guid;
364         osm_physp_t *p_physp;
365         osm_port_t *p_port;
366         ib_net64_t port_guid;
367
368         OSM_LOG_ENTER(sm->p_log);
369
370         node_guid = osm_node_get_node_guid(p_node);
371
372         if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH) {
373                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0107: "
374                         "Node 0x%016" PRIx64 " is not a switch node\n",
375                         cl_ntoh64(node_guid));
376                 goto Exit;
377         }
378
379         /* Make sure we have a switch object for this node */
380         if (!p_node->sw) {
381                 /* We do not have switch info for this node */
382                 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
383                         "Node 0x%016" PRIx64 " no switch in table\n",
384                         cl_ntoh64(node_guid));
385
386                 __osm_drop_mgr_process_node(sm, p_node);
387                 goto Exit;
388         }
389
390         /* Make sure we have a port object for port zero */
391         p_physp = osm_node_get_physp_ptr(p_node, 0);
392         if (!p_physp) {
393                 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
394                         "Node 0x%016" PRIx64 " no valid physical port 0\n",
395                         cl_ntoh64(node_guid));
396
397                 __osm_drop_mgr_process_node(sm, p_node);
398                 goto Exit;
399         }
400
401         port_guid = osm_physp_get_port_guid(p_physp);
402
403         p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
404
405         if (!p_port) {
406                 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
407                         "Node 0x%016" PRIx64 " has no port object\n",
408                         cl_ntoh64(node_guid));
409
410                 __osm_drop_mgr_process_node(sm, p_node);
411                 goto Exit;
412         }
413
414         if (p_port->discovery_count == 0) {
415                 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
416                         "Node 0x%016" PRIx64 " port has discovery count zero\n",
417                         cl_ntoh64(node_guid));
418
419                 __osm_drop_mgr_process_node(sm, p_node);
420                 goto Exit;
421         }
422
423 Exit:
424         OSM_LOG_EXIT(sm->p_log);
425         return;
426 }
427
428 /**********************************************************************
429  **********************************************************************/
430 void osm_drop_mgr_process(osm_sm_t * sm)
431 {
432         cl_qmap_t *p_node_guid_tbl;
433         cl_qmap_t *p_port_guid_tbl;
434         osm_port_t *p_port;
435         osm_port_t *p_next_port;
436         osm_node_t *p_node;
437         osm_node_t *p_next_node;
438
439         CL_ASSERT(sm);
440
441         OSM_LOG_ENTER(sm->p_log);
442
443         p_node_guid_tbl = &sm->p_subn->node_guid_tbl;
444         p_port_guid_tbl = &sm->p_subn->port_guid_tbl;
445
446         CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
447
448         p_next_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl);
449         while (p_next_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl)) {
450                 p_node = p_next_node;
451                 p_next_node =
452                     (osm_node_t *) cl_qmap_next(&p_next_node->map_item);
453
454                 CL_ASSERT(cl_qmap_key(&p_node->map_item) ==
455                           osm_node_get_node_guid(p_node));
456
457                 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
458                         "Checking node 0x%016" PRIx64 "\n",
459                         cl_ntoh64(osm_node_get_node_guid(p_node)));
460
461                 /*
462                    Check if this node was discovered during the last sweep.
463                    If not, it is unreachable in the current subnet, and
464                    should therefore be removed from the subnet object.
465                  */
466                 if (p_node->discovery_count == 0)
467                         __osm_drop_mgr_process_node(sm, p_node);
468         }
469
470         /*
471            Go over all the nodes. If the node is a switch - make sure
472            there is also a switch record for it, and a portInfo record for
473            port zero of of the node.
474            If not - this means that there was some error in getting the data
475            of this node. Drop the node.
476          */
477         p_next_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl);
478         while (p_next_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl)) {
479                 p_node = p_next_node;
480                 p_next_node =
481                     (osm_node_t *) cl_qmap_next(&p_next_node->map_item);
482
483                 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
484                         "Checking full discovery of node 0x%016" PRIx64 "\n",
485                         cl_ntoh64(osm_node_get_node_guid(p_node)));
486
487                 if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH)
488                         continue;
489
490                 /* We are handling a switch node */
491                 __osm_drop_mgr_check_node(sm, p_node);
492         }
493
494         p_next_port = (osm_port_t *) cl_qmap_head(p_port_guid_tbl);
495         while (p_next_port != (osm_port_t *) cl_qmap_end(p_port_guid_tbl)) {
496                 p_port = p_next_port;
497                 p_next_port =
498                     (osm_port_t *) cl_qmap_next(&p_next_port->map_item);
499
500                 CL_ASSERT(cl_qmap_key(&p_port->map_item) ==
501                           osm_port_get_guid(p_port));
502
503                 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
504                         "Checking port 0x%016" PRIx64 "\n",
505                         cl_ntoh64(osm_port_get_guid(p_port)));
506
507                 /*
508                    If the port is unreachable, remove it from the guid table.
509                  */
510                 if (p_port->discovery_count == 0)
511                         __osm_drop_mgr_remove_port(sm, p_port);
512         }
513
514         CL_PLOCK_RELEASE(sm->p_lock);
515         OSM_LOG_EXIT(sm->p_log);
516 }