]> CyberLeo.Net >> Repos - FreeBSD/releng/9.2.git/blob - contrib/ofed/management/opensm/opensm/osm_node_info_rcv.c
- Copy stable/9 to releng/9.2 as part of the 9.2-RELEASE cycle.
[FreeBSD/releng/9.2.git] / contrib / ofed / management / opensm / opensm / osm_node_info_rcv.c
1 /*
2  * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
3  * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
4  * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 /*
37  * Abstract:
38  *    Implementation of osm_ni_rcv_t.
39  * This object represents the NodeInfo Receiver object.
40  * This object is part of the opensm family of objects.
41  */
42
43 #if HAVE_CONFIG_H
44 #  include <config.h>
45 #endif                          /* HAVE_CONFIG_H */
46
47 #include <stdlib.h>
48 #include <string.h>
49 #include <iba/ib_types.h>
50 #include <complib/cl_qmap.h>
51 #include <complib/cl_passivelock.h>
52 #include <complib/cl_debug.h>
53 #include <opensm/osm_madw.h>
54 #include <opensm/osm_log.h>
55 #include <opensm/osm_node.h>
56 #include <opensm/osm_subnet.h>
57 #include <opensm/osm_router.h>
58 #include <opensm/osm_mad_pool.h>
59 #include <opensm/osm_helper.h>
60 #include <opensm/osm_msgdef.h>
61 #include <opensm/osm_opensm.h>
62 #include <opensm/osm_ucast_mgr.h>
63
64 static void
65 report_duplicated_guid(IN osm_sm_t * sm,
66                        osm_physp_t * p_physp,
67                        osm_node_t * p_neighbor_node, const uint8_t port_num)
68 {
69         osm_physp_t *p_old, *p_new;
70         osm_dr_path_t path;
71
72         p_old = p_physp->p_remote_physp;
73         p_new = osm_node_get_physp_ptr(p_neighbor_node, port_num);
74
75         OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D01: "
76                 "Found duplicated node.\n"
77                 "Node 0x%" PRIx64 " port %u is reachable from remote node "
78                 "0x%" PRIx64 " port %u and remote node 0x%" PRIx64 " port %u.\n"
79                 "Paths are:\n",
80                 cl_ntoh64(p_physp->p_node->node_info.node_guid),
81                 p_physp->port_num,
82                 cl_ntoh64(p_old->p_node->node_info.node_guid), p_old->port_num,
83                 cl_ntoh64(p_new->p_node->node_info.node_guid), p_new->port_num);
84
85         osm_dump_dr_path(sm->p_log, osm_physp_get_dr_path_ptr(p_physp),
86                          OSM_LOG_ERROR);
87
88         path = *osm_physp_get_dr_path_ptr(p_new);
89         osm_dr_path_extend(&path, port_num);
90         osm_dump_dr_path(sm->p_log, &path, OSM_LOG_ERROR);
91
92         osm_log(sm->p_log, OSM_LOG_SYS,
93                 "FATAL: duplicated guids or 12x lane reversal\n");
94 }
95
96 static void requery_dup_node_info(IN osm_sm_t * sm,
97                                   osm_physp_t * p_physp, unsigned count)
98 {
99         osm_madw_context_t context;
100         osm_dr_path_t path;
101         cl_status_t status;
102
103         path = *osm_physp_get_dr_path_ptr(p_physp->p_remote_physp);
104         osm_dr_path_extend(&path, p_physp->p_remote_physp->port_num);
105
106         context.ni_context.node_guid =
107             p_physp->p_remote_physp->p_node->node_info.port_guid;
108         context.ni_context.port_num = p_physp->p_remote_physp->port_num;
109         context.ni_context.dup_node_guid = p_physp->p_node->node_info.node_guid;
110         context.ni_context.dup_port_num = p_physp->port_num;
111         context.ni_context.dup_count = count;
112
113         status = osm_req_get(sm, &path, IB_MAD_ATTR_NODE_INFO,
114                              0, CL_DISP_MSGID_NONE, &context);
115
116         if (status != IB_SUCCESS)
117                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D02: "
118                         "Failure initiating NodeInfo request (%s)\n",
119                         ib_get_err_str(status));
120 }
121
122 /**********************************************************************
123  The plock must be held before calling this function.
124 **********************************************************************/
125 static void
126 __osm_ni_rcv_set_links(IN osm_sm_t * sm,
127                        osm_node_t * p_node,
128                        const uint8_t port_num,
129                        const osm_ni_context_t * const p_ni_context)
130 {
131         osm_node_t *p_neighbor_node;
132         osm_physp_t *p_physp;
133
134         OSM_LOG_ENTER(sm->p_log);
135
136         /*
137            A special case exists in which the node we're trying to
138            link is our own node.  In this case, the guid value in
139            the ni_context will be zero.
140          */
141         if (p_ni_context->node_guid == 0) {
142                 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
143                         "Nothing to link for our own node 0x%" PRIx64 "\n",
144                         cl_ntoh64(osm_node_get_node_guid(p_node)));
145                 goto _exit;
146         }
147
148         p_neighbor_node = osm_get_node_by_guid(sm->p_subn,
149                                                p_ni_context->node_guid);
150         if (!p_neighbor_node) {
151                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D10: "
152                         "Unexpected removal of neighbor node "
153                         "0x%" PRIx64 "\n", cl_ntoh64(p_ni_context->node_guid));
154                 goto _exit;
155         }
156
157         /*
158            We have seen this neighbor node before, but we might
159            not have seen this port on the neighbor node before.
160            We should not set links to an uninitialized port on the
161            neighbor, so check validity up front.  If it's not
162            valid, do nothing, since we'll see this link again
163            when we probe the neighbor.
164          */
165         if (!osm_node_link_has_valid_ports(p_node, port_num,
166                                            p_neighbor_node,
167                                            p_ni_context->port_num))
168                 goto _exit;
169
170         if (osm_node_link_exists(p_node, port_num,
171                                  p_neighbor_node, p_ni_context->port_num)) {
172                 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Link already exists\n");
173                 goto _exit;
174         }
175
176         if (osm_node_has_any_link(p_node, port_num) &&
177             sm->p_subn->force_heavy_sweep == FALSE &&
178             (!p_ni_context->dup_count ||
179              (p_ni_context->dup_node_guid == osm_node_get_node_guid(p_node) &&
180               p_ni_context->dup_port_num == port_num))) {
181                 /*
182                    Uh oh...
183                    This could be reconnected ports, but also duplicated GUID
184                    (2 nodes have the same guid) or a 12x link with lane reversal
185                    that is not configured correctly.
186                    We will try to recover by querying NodeInfo again.
187                    In order to catch even fast port moving to new location(s) and
188                    back we will count up to 5.
189                    Some crazy reconnections (newly created switch loop right before
190                    targeted CA) will not be catched this way. So in worst case -
191                    report GUID duplication and request new discovery.
192                    When switch node is targeted NodeInfo querying will be done in
193                    opposite order, this is much stronger check, unfortunately it is
194                    impossible with CAs.
195                  */
196                 p_physp = osm_node_get_physp_ptr(p_node, port_num);
197                 if (p_ni_context->dup_count > 5) {
198                         report_duplicated_guid(sm, p_physp,
199                                                p_neighbor_node,
200                                                p_ni_context->port_num);
201                         sm->p_subn->force_heavy_sweep = TRUE;
202                 } else if (p_node->sw)
203                         requery_dup_node_info(sm, p_physp->p_remote_physp,
204                                               p_ni_context->dup_count + 1);
205                 else
206                         requery_dup_node_info(sm, p_physp,
207                                               p_ni_context->dup_count + 1);
208         }
209
210         /*
211            When there are only two nodes with exact same guids (connected back
212            to back) - the previous check for duplicated guid will not catch
213            them. But the link will be from the port to itself...
214            Enhanced Port 0 is an exception to this
215          */
216         if ((osm_node_get_node_guid(p_node) == p_ni_context->node_guid) &&
217             (port_num == p_ni_context->port_num) &&
218             port_num != 0 && cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) {
219                 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
220                         "Duplicate GUID found by link from a port to itself:"
221                         "node 0x%" PRIx64 ", port number %u\n",
222                         cl_ntoh64(osm_node_get_node_guid(p_node)), port_num);
223                 p_physp = osm_node_get_physp_ptr(p_node, port_num);
224                 osm_dump_dr_path(sm->p_log,
225                                  osm_physp_get_dr_path_ptr(p_physp),
226                                  OSM_LOG_VERBOSE);
227
228                 if (sm->p_subn->opt.exit_on_fatal == TRUE) {
229                         osm_log(sm->p_log, OSM_LOG_SYS,
230                                 "Errors on subnet. Duplicate GUID found "
231                                 "by link from a port to itself. "
232                                 "See verbose opensm.log for more details\n");
233                         exit(1);
234                 }
235         }
236
237         OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
238                 "Creating new link between:\n\t\t\t\tnode 0x%" PRIx64
239                 ", port number %u and\n\t\t\t\tnode 0x%" PRIx64
240                 ", port number %u\n",
241                 cl_ntoh64(osm_node_get_node_guid(p_node)), port_num,
242                 cl_ntoh64(p_ni_context->node_guid), p_ni_context->port_num);
243
244         if (sm->ucast_mgr.cache_valid)
245                 osm_ucast_cache_check_new_link(&sm->ucast_mgr,
246                                                p_node, port_num,
247                                                p_neighbor_node,
248                                                p_ni_context->port_num);
249
250         osm_node_link(p_node, port_num, p_neighbor_node,
251                       p_ni_context->port_num);
252
253 _exit:
254         OSM_LOG_EXIT(sm->p_log);
255 }
256
257 /**********************************************************************
258  The plock must be held before calling this function.
259 **********************************************************************/
260 static void
261 __osm_ni_rcv_process_new_node(IN osm_sm_t * sm,
262                               IN osm_node_t * const p_node,
263                               IN const osm_madw_t * const p_madw)
264 {
265         ib_api_status_t status = IB_SUCCESS;
266         osm_madw_context_t context;
267         osm_physp_t *p_physp;
268         ib_node_info_t *p_ni;
269         ib_smp_t *p_smp;
270         uint8_t port_num;
271
272         OSM_LOG_ENTER(sm->p_log);
273
274         p_smp = osm_madw_get_smp_ptr(p_madw);
275         p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
276         port_num = ib_node_info_get_local_port_num(p_ni);
277
278         /*
279            Request PortInfo & NodeDescription attributes for the port
280            that responded to the NodeInfo attribute.
281            Because this is a channel adapter or router, we are
282            not allowed to request PortInfo for the other ports.
283            Set the context union properly, so the recipient
284            knows which node & port are relevant.
285          */
286         p_physp = osm_node_get_physp_ptr(p_node, port_num);
287
288         context.pi_context.node_guid = p_ni->node_guid;
289         context.pi_context.port_guid = p_ni->port_guid;
290         context.pi_context.set_method = FALSE;
291         context.pi_context.light_sweep = FALSE;
292         context.pi_context.active_transition = FALSE;
293
294         status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp),
295                              IB_MAD_ATTR_PORT_INFO,
296                              cl_hton32(port_num), CL_DISP_MSGID_NONE, &context);
297         if (status != IB_SUCCESS)
298                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D02: "
299                         "Failure initiating PortInfo request (%s)\n",
300                         ib_get_err_str(status));
301
302         OSM_LOG_EXIT(sm->p_log);
303 }
304
305 /**********************************************************************
306  The plock must be held before calling this function.
307 **********************************************************************/
308 void
309 osm_req_get_node_desc(IN osm_sm_t * sm,
310                         osm_physp_t *p_physp)
311 {
312         ib_api_status_t status = IB_SUCCESS;
313         osm_madw_context_t context;
314
315         OSM_LOG_ENTER(sm->p_log);
316
317         context.nd_context.node_guid =
318                 osm_node_get_node_guid(osm_physp_get_node_ptr(p_physp));
319
320         status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp),
321                              IB_MAD_ATTR_NODE_DESC,
322                              0, CL_DISP_MSGID_NONE, &context);
323         if (status != IB_SUCCESS)
324                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D03: "
325                         "Failure initiating NodeDescription request (%s)\n",
326                         ib_get_err_str(status));
327
328         OSM_LOG_EXIT(sm->p_log);
329 }
330
331 /**********************************************************************
332  The plock must be held before calling this function.
333 **********************************************************************/
334 static void
335 __osm_ni_rcv_get_node_desc(IN osm_sm_t * sm,
336                            IN osm_node_t * const p_node,
337                            IN const osm_madw_t * const p_madw)
338 {
339         ib_node_info_t *p_ni;
340         ib_smp_t *p_smp;
341         uint8_t port_num;
342         osm_physp_t *p_physp = NULL;
343
344         OSM_LOG_ENTER(sm->p_log);
345
346         p_smp = osm_madw_get_smp_ptr(p_madw);
347         p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
348         port_num = ib_node_info_get_local_port_num(p_ni);
349
350         /*
351            Request PortInfo & NodeDescription attributes for the port
352            that responded to the NodeInfo attribute.
353            Because this is a channel adapter or router, we are
354            not allowed to request PortInfo for the other ports.
355            Set the context union properly, so the recipient
356            knows which node & port are relevant.
357          */
358         p_physp = osm_node_get_physp_ptr(p_node, port_num);
359
360         osm_req_get_node_desc(sm, p_physp);
361
362         OSM_LOG_EXIT(sm->p_log);
363 }
364
365 /**********************************************************************
366  The plock must be held before calling this function.
367 **********************************************************************/
368 static void
369 __osm_ni_rcv_process_new_ca_or_router(IN osm_sm_t * sm,
370                                       IN osm_node_t * const p_node,
371                                       IN const osm_madw_t * const p_madw)
372 {
373         OSM_LOG_ENTER(sm->p_log);
374
375         __osm_ni_rcv_process_new_node(sm, p_node, p_madw);
376
377         /*
378            A node guid of 0 is the corner case that indicates
379            we discovered our own node.  Initialize the subnet
380            object with the SM's own port guid.
381          */
382         if (osm_madw_get_ni_context_ptr(p_madw)->node_guid == 0)
383                 sm->p_subn->sm_port_guid = p_node->node_info.port_guid;
384
385         OSM_LOG_EXIT(sm->p_log);
386 }
387
388 /**********************************************************************
389  The plock must be held before calling this function.
390 **********************************************************************/
391 static void
392 __osm_ni_rcv_process_existing_ca_or_router(IN osm_sm_t * sm,
393                                            IN osm_node_t * const p_node,
394                                            IN const osm_madw_t * const p_madw)
395 {
396         ib_node_info_t *p_ni;
397         ib_smp_t *p_smp;
398         osm_port_t *p_port;
399         osm_port_t *p_port_check;
400         osm_madw_context_t context;
401         uint8_t port_num;
402         osm_physp_t *p_physp;
403         ib_api_status_t status;
404         osm_dr_path_t *p_dr_path;
405         osm_bind_handle_t h_bind;
406
407         OSM_LOG_ENTER(sm->p_log);
408
409         p_smp = osm_madw_get_smp_ptr(p_madw);
410         p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
411         port_num = ib_node_info_get_local_port_num(p_ni);
412         h_bind = osm_madw_get_bind_handle(p_madw);
413
414         /*
415            Determine if we have encountered this node through a
416            previously undiscovered port.  If so, build the new
417            port object.
418          */
419         p_port = osm_get_port_by_guid(sm->p_subn, p_ni->port_guid);
420         if (!p_port) {
421                 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
422                         "Creating new port object with GUID 0x%" PRIx64 "\n",
423                         cl_ntoh64(p_ni->port_guid));
424
425                 osm_node_init_physp(p_node, p_madw);
426
427                 p_port = osm_port_new(p_ni, p_node);
428                 if (p_port == NULL) {
429                         OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D04: "
430                                 "Unable to create new port object\n");
431                         goto Exit;
432                 }
433
434                 /*
435                    Add the new port object to the database.
436                  */
437                 p_port_check =
438                     (osm_port_t *) cl_qmap_insert(&sm->p_subn->port_guid_tbl,
439                                                   p_ni->port_guid,
440                                                   &p_port->map_item);
441                 if (p_port_check != p_port) {
442                         /*
443                            We should never be here!
444                            Somehow, this port GUID already exists in the table.
445                          */
446                         OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D12: "
447                                 "Port 0x%" PRIx64 " already in the database!\n",
448                                 cl_ntoh64(p_ni->port_guid));
449
450                         osm_port_delete(&p_port);
451                         goto Exit;
452                 }
453
454                 /* If we are a master, then this means the port is new on the subnet.
455                    Mark it as new - need to send trap 64 on these ports.
456                    The condition that we are master is true, since if we are in discovering
457                    state (meaning we woke up from standby or we are just initializing),
458                    then these ports may be new to us, but are not new on the subnet.
459                    If we are master, then the subnet as we know it is the updated one,
460                    and any new ports we encounter should cause trap 64. C14-72.1.1 */
461                 if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER)
462                         p_port->is_new = 1;
463
464                 p_physp = osm_node_get_physp_ptr(p_node, port_num);
465         } else {
466                 p_physp = osm_node_get_physp_ptr(p_node, port_num);
467                 /*
468                    Update the DR Path to the port,
469                    in case the old one is no longer available.
470                  */
471                 p_dr_path = osm_physp_get_dr_path_ptr(p_physp);
472
473                 osm_dr_path_init(p_dr_path, h_bind, p_smp->hop_count,
474                                  p_smp->initial_path);
475         }
476
477         context.pi_context.node_guid = p_ni->node_guid;
478         context.pi_context.port_guid = p_ni->port_guid;
479         context.pi_context.set_method = FALSE;
480         context.pi_context.light_sweep = FALSE;
481
482         status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp),
483                              IB_MAD_ATTR_PORT_INFO,
484                              cl_hton32(port_num), CL_DISP_MSGID_NONE, &context);
485
486         if (status != IB_SUCCESS)
487                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D13: "
488                         "Failure initiating PortInfo request (%s)\n",
489                         ib_get_err_str(status));
490
491 Exit:
492         OSM_LOG_EXIT(sm->p_log);
493 }
494
495 /**********************************************************************
496  **********************************************************************/
497 static void
498 __osm_ni_rcv_process_switch(IN osm_sm_t * sm,
499                             IN osm_node_t * const p_node,
500                             IN const osm_madw_t * const p_madw)
501 {
502         ib_api_status_t status = IB_SUCCESS;
503         osm_madw_context_t context;
504         osm_dr_path_t *path;
505         ib_smp_t *p_smp;
506
507         OSM_LOG_ENTER(sm->p_log);
508
509         p_smp = osm_madw_get_smp_ptr(p_madw);
510
511         /* update DR path of already initialized switch port 0 */
512         path = osm_physp_get_dr_path_ptr(osm_node_get_physp_ptr(p_node, 0));
513         osm_dr_path_init(path, osm_madw_get_bind_handle(p_madw),
514                          p_smp->hop_count, p_smp->initial_path);
515
516         context.si_context.node_guid = osm_node_get_node_guid(p_node);
517         context.si_context.set_method = FALSE;
518         context.si_context.light_sweep = FALSE;
519
520         /* Request a SwitchInfo attribute */
521         status = osm_req_get(sm, path, IB_MAD_ATTR_SWITCH_INFO,
522                              0, CL_DISP_MSGID_NONE, &context);
523         if (status != IB_SUCCESS)
524                 /* continue despite error */
525                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D06: "
526                         "Failure initiating SwitchInfo request (%s)\n",
527                         ib_get_err_str(status));
528
529         OSM_LOG_EXIT(sm->p_log);
530 }
531
532 /**********************************************************************
533  The plock must be held before calling this function.
534 **********************************************************************/
535 static void
536 __osm_ni_rcv_process_existing_switch(IN osm_sm_t * sm,
537                                      IN osm_node_t * const p_node,
538                                      IN const osm_madw_t * const p_madw)
539 {
540         OSM_LOG_ENTER(sm->p_log);
541
542         /*
543            If this switch has already been probed during this sweep,
544            then don't bother reprobing it.
545            There is one exception - if the node has been visited, but
546            for some reason we don't have the switch object (this can happen
547            if the SwitchInfo mad didn't reach the SM) then we want
548            to retry to probe the switch.
549          */
550         if (p_node->discovery_count == 1)
551                 __osm_ni_rcv_process_switch(sm, p_node, p_madw);
552         else if (!p_node->sw || p_node->sw->discovery_count == 0) {
553                 /* we don't have the SwitchInfo - retry to get it */
554                 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
555                         "Retry to get SwitchInfo on node GUID:0x%"
556                         PRIx64 "\n", cl_ntoh64(osm_node_get_node_guid(p_node)));
557                 __osm_ni_rcv_process_switch(sm, p_node, p_madw);
558         }
559
560         OSM_LOG_EXIT(sm->p_log);
561 }
562
563 /**********************************************************************
564  The plock must be held before calling this function.
565 **********************************************************************/
566 static void
567 __osm_ni_rcv_process_new_switch(IN osm_sm_t * sm,
568                                 IN osm_node_t * const p_node,
569                                 IN const osm_madw_t * const p_madw)
570 {
571         OSM_LOG_ENTER(sm->p_log);
572
573         __osm_ni_rcv_process_switch(sm, p_node, p_madw);
574
575         /*
576            A node guid of 0 is the corner case that indicates
577            we discovered our own node.  Initialize the subnet
578            object with the SM's own port guid.
579          */
580         if (osm_madw_get_ni_context_ptr(p_madw)->node_guid == 0)
581                 sm->p_subn->sm_port_guid = p_node->node_info.port_guid;
582
583         OSM_LOG_EXIT(sm->p_log);
584 }
585
586 /**********************************************************************
587  The plock must NOT be held before calling this function.
588 **********************************************************************/
589 static void
590 __osm_ni_rcv_process_new(IN osm_sm_t * sm,
591                          IN const osm_madw_t * const p_madw)
592 {
593         osm_node_t *p_node;
594         osm_node_t *p_node_check;
595         osm_port_t *p_port;
596         osm_port_t *p_port_check;
597         osm_router_t *p_rtr = NULL;
598         osm_router_t *p_rtr_check;
599         cl_qmap_t *p_rtr_guid_tbl;
600         ib_node_info_t *p_ni;
601         ib_smp_t *p_smp;
602         osm_ni_context_t *p_ni_context;
603         uint8_t port_num;
604
605         OSM_LOG_ENTER(sm->p_log);
606
607         p_smp = osm_madw_get_smp_ptr(p_madw);
608         p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
609         p_ni_context = osm_madw_get_ni_context_ptr(p_madw);
610         port_num = ib_node_info_get_local_port_num(p_ni);
611
612         osm_dump_smp_dr_path(sm->p_log, p_smp, OSM_LOG_VERBOSE);
613
614         OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
615                 "Discovered new %s node,"
616                 "\n\t\t\t\tGUID 0x%" PRIx64 ", TID 0x%" PRIx64 "\n",
617                 ib_get_node_type_str(p_ni->node_type),
618                 cl_ntoh64(p_ni->node_guid), cl_ntoh64(p_smp->trans_id));
619
620         p_node = osm_node_new(p_madw);
621         if (p_node == NULL) {
622                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D07: "
623                         "Unable to create new node object\n");
624                 goto Exit;
625         }
626
627         /*
628            Create a new port object to represent this node's physical
629            ports in the port table.
630          */
631         p_port = osm_port_new(p_ni, p_node);
632         if (p_port == NULL) {
633                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D14: "
634                         "Unable to create new port object\n");
635                 osm_node_delete(&p_node);
636                 goto Exit;
637         }
638
639         /*
640            Add the new port object to the database.
641          */
642         p_port_check =
643             (osm_port_t *) cl_qmap_insert(&sm->p_subn->port_guid_tbl,
644                                           p_ni->port_guid, &p_port->map_item);
645         if (p_port_check != p_port) {
646                 /*
647                    We should never be here!
648                    Somehow, this port GUID already exists in the table.
649                  */
650                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D15: "
651                         "Duplicate Port GUID 0x%" PRIx64
652                         "! Found by the two directed routes:\n",
653                         cl_ntoh64(p_ni->port_guid));
654                 osm_dump_dr_path(sm->p_log,
655                                  osm_physp_get_dr_path_ptr(p_port->p_physp),
656                                  OSM_LOG_ERROR);
657                 osm_dump_dr_path(sm->p_log,
658                                  osm_physp_get_dr_path_ptr(p_port_check->
659                                                            p_physp),
660                                  OSM_LOG_ERROR);
661                 osm_port_delete(&p_port);
662                 osm_node_delete(&p_node);
663                 goto Exit;
664         }
665
666         /* If we are a master, then this means the port is new on the subnet.
667            Mark it as new - need to send trap 64 on these ports.
668            The condition that we are master is true, since if we are in discovering
669            state (meaning we woke up from standby or we are just initializing),
670            then these ports may be new to us, but are not new on the subnet.
671            If we are master, then the subnet as we know it is the updated one,
672            and any new ports we encounter should cause trap 64. C14-72.1.1 */
673         if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER)
674                 p_port->is_new = 1;
675
676         /* If there were RouterInfo or other router attribute,
677            this would be elsewhere */
678         if (p_ni->node_type == IB_NODE_TYPE_ROUTER) {
679                 if ((p_rtr = osm_router_new(p_port)) == NULL)
680                         OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1A: "
681                                 "Unable to create new router object\n");
682                 else {
683                         p_rtr_guid_tbl = &sm->p_subn->rtr_guid_tbl;
684                         p_rtr_check =
685                             (osm_router_t *) cl_qmap_insert(p_rtr_guid_tbl,
686                                                             p_ni->port_guid,
687                                                             &p_rtr->map_item);
688                         if (p_rtr_check != p_rtr)
689                                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1B: "
690                                         "Unable to add port GUID:0x%016" PRIx64
691                                         " to router table\n",
692                                         cl_ntoh64(p_ni->port_guid));
693                 }
694         }
695
696         p_node_check =
697             (osm_node_t *) cl_qmap_insert(&sm->p_subn->node_guid_tbl,
698                                           p_ni->node_guid, &p_node->map_item);
699         if (p_node_check != p_node) {
700                 /*
701                    This node must have been inserted by another thread.
702                    This is unexpected, but is not an error.
703                    We can simply clean-up, since the other thread will
704                    see this processing through to completion.
705                  */
706                 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
707                         "Discovery race detected at node 0x%" PRIx64 "\n",
708                         cl_ntoh64(p_ni->node_guid));
709                 osm_node_delete(&p_node);
710                 p_node = p_node_check;
711                 __osm_ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
712                 goto Exit;
713         } else
714                 __osm_ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
715
716         p_node->discovery_count++;
717         __osm_ni_rcv_get_node_desc(sm, p_node, p_madw);
718
719         switch (p_ni->node_type) {
720         case IB_NODE_TYPE_CA:
721         case IB_NODE_TYPE_ROUTER:
722                 __osm_ni_rcv_process_new_ca_or_router(sm, p_node, p_madw);
723                 break;
724         case IB_NODE_TYPE_SWITCH:
725                 __osm_ni_rcv_process_new_switch(sm, p_node, p_madw);
726                 break;
727         default:
728                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: "
729                         "Unknown node type %u with GUID 0x%" PRIx64 "\n",
730                         p_ni->node_type, cl_ntoh64(p_ni->node_guid));
731                 break;
732         }
733
734 Exit:
735         OSM_LOG_EXIT(sm->p_log);
736 }
737
738 /**********************************************************************
739  The plock must be held before calling this function.
740 **********************************************************************/
741 static void
742 __osm_ni_rcv_process_existing(IN osm_sm_t * sm,
743                               IN osm_node_t * const p_node,
744                               IN const osm_madw_t * const p_madw)
745 {
746         ib_node_info_t *p_ni;
747         ib_smp_t *p_smp;
748         osm_ni_context_t *p_ni_context;
749         uint8_t port_num;
750
751         OSM_LOG_ENTER(sm->p_log);
752
753         p_smp = osm_madw_get_smp_ptr(p_madw);
754         p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
755         p_ni_context = osm_madw_get_ni_context_ptr(p_madw);
756         port_num = ib_node_info_get_local_port_num(p_ni);
757
758         OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
759                 "Rediscovered %s node 0x%" PRIx64 " TID 0x%" PRIx64
760                 ", discovered %u times already\n",
761                 ib_get_node_type_str(p_ni->node_type),
762                 cl_ntoh64(p_ni->node_guid),
763                 cl_ntoh64(p_smp->trans_id), p_node->discovery_count);
764
765         /*
766            If we haven't already encountered this existing node
767            on this particular sweep, then process further.
768          */
769         p_node->discovery_count++;
770
771         switch (p_ni->node_type) {
772         case IB_NODE_TYPE_CA:
773         case IB_NODE_TYPE_ROUTER:
774                 __osm_ni_rcv_process_existing_ca_or_router(sm, p_node,
775                                                            p_madw);
776                 break;
777
778         case IB_NODE_TYPE_SWITCH:
779                 __osm_ni_rcv_process_existing_switch(sm, p_node, p_madw);
780                 break;
781
782         default:
783                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D09: "
784                         "Unknown node type %u with GUID 0x%" PRIx64 "\n",
785                         p_ni->node_type, cl_ntoh64(p_ni->node_guid));
786                 break;
787         }
788
789         __osm_ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
790
791         OSM_LOG_EXIT(sm->p_log);
792 }
793
794 /**********************************************************************
795  **********************************************************************/
796 void osm_ni_rcv_process(IN void *context, IN void *data)
797 {
798         osm_sm_t *sm = context;
799         osm_madw_t *p_madw = data;
800         ib_node_info_t *p_ni;
801         ib_smp_t *p_smp;
802         osm_node_t *p_node;
803
804         CL_ASSERT(sm);
805
806         OSM_LOG_ENTER(sm->p_log);
807
808         CL_ASSERT(p_madw);
809
810         p_smp = osm_madw_get_smp_ptr(p_madw);
811         p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
812
813         CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_NODE_INFO);
814
815         if (p_ni->node_guid == 0) {
816                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: "
817                         "Got Zero Node GUID! Found on the directed route:\n");
818                 osm_dump_smp_dr_path(sm->p_log, p_smp, OSM_LOG_ERROR);
819                 goto Exit;
820         }
821
822         if (p_ni->port_guid == 0) {
823                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D17: "
824                         "Got Zero Port GUID! Found on the directed route:\n");
825                 osm_dump_smp_dr_path(sm->p_log, p_smp, OSM_LOG_ERROR);
826                 goto Exit;
827         }
828
829         /*
830            Determine if this node has already been discovered,
831            and process accordingly.
832            During processing of this node, hold the shared lock.
833          */
834
835         CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
836         p_node = osm_get_node_by_guid(sm->p_subn, p_ni->node_guid);
837
838         osm_dump_node_info(sm->p_log, p_ni, OSM_LOG_DEBUG);
839
840         if (!p_node)
841                 __osm_ni_rcv_process_new(sm, p_madw);
842         else
843                 __osm_ni_rcv_process_existing(sm, p_node, p_madw);
844
845         CL_PLOCK_RELEASE(sm->p_lock);
846
847 Exit:
848         OSM_LOG_EXIT(sm->p_log);
849 }