]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - contrib/ofed/management/opensm/opensm/osm_ucast_mgr.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / contrib / ofed / management / opensm / opensm / osm_ucast_mgr.c
1 /*
2  * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
3  * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
4  * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 /*
37  * Abstract:
38  *    Implementation of osm_ucast_mgr_t.
39  * This file implements the Unicast Manager object.
40  */
41
42 #if HAVE_CONFIG_H
43 #  include <config.h>
44 #endif                          /* HAVE_CONFIG_H */
45
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <iba/ib_types.h>
50 #include <complib/cl_qmap.h>
51 #include <complib/cl_debug.h>
52 #include <complib/cl_qlist.h>
53 #include <opensm/osm_ucast_mgr.h>
54 #include <opensm/osm_sm.h>
55 #include <opensm/osm_log.h>
56 #include <opensm/osm_node.h>
57 #include <opensm/osm_switch.h>
58 #include <opensm/osm_helper.h>
59 #include <opensm/osm_msgdef.h>
60 #include <opensm/osm_opensm.h>
61
62 /**********************************************************************
63  **********************************************************************/
64 void osm_ucast_mgr_construct(IN osm_ucast_mgr_t * const p_mgr)
65 {
66         memset(p_mgr, 0, sizeof(*p_mgr));
67 }
68
69 /**********************************************************************
70  **********************************************************************/
71 void osm_ucast_mgr_destroy(IN osm_ucast_mgr_t * const p_mgr)
72 {
73         CL_ASSERT(p_mgr);
74
75         OSM_LOG_ENTER(p_mgr->p_log);
76
77         if (p_mgr->cache_valid)
78                 osm_ucast_cache_invalidate(p_mgr);
79
80         OSM_LOG_EXIT(p_mgr->p_log);
81 }
82
83 /**********************************************************************
84  **********************************************************************/
85 ib_api_status_t
86 osm_ucast_mgr_init(IN osm_ucast_mgr_t * const p_mgr, IN osm_sm_t * sm)
87 {
88         ib_api_status_t status = IB_SUCCESS;
89
90         OSM_LOG_ENTER(sm->p_log);
91
92         osm_ucast_mgr_construct(p_mgr);
93
94         p_mgr->sm = sm;
95         p_mgr->p_log = sm->p_log;
96         p_mgr->p_subn = sm->p_subn;
97         p_mgr->p_lock = sm->p_lock;
98
99         if (sm->p_subn->opt.use_ucast_cache)
100                 cl_qmap_init(&p_mgr->cache_sw_tbl);
101
102         OSM_LOG_EXIT(p_mgr->p_log);
103         return (status);
104 }
105
106 /**********************************************************************
107  Add each switch's own and neighbor LIDs to its LID matrix
108 **********************************************************************/
109 static void
110 __osm_ucast_mgr_process_hop_0_1(IN cl_map_item_t * const p_map_item,
111                                 IN void *context)
112 {
113         osm_switch_t *const p_sw = (osm_switch_t *) p_map_item;
114         osm_node_t *p_remote_node;
115         uint16_t lid, remote_lid;
116         uint8_t i, remote_port;
117
118         lid = osm_node_get_base_lid(p_sw->p_node, 0);
119         lid = cl_ntoh16(lid);
120         osm_switch_set_hops(p_sw, lid, 0, 0);
121
122         for (i = 1; i < p_sw->num_ports; i++) {
123                 p_remote_node =
124                     osm_node_get_remote_node(p_sw->p_node, i, &remote_port);
125
126                 if (p_remote_node && p_remote_node->sw &&
127                     (p_remote_node != p_sw->p_node)) {
128                         remote_lid = osm_node_get_base_lid(p_remote_node, 0);
129                         remote_lid = cl_ntoh16(remote_lid);
130                         osm_switch_set_hops(p_sw, remote_lid, i, 1);
131                         osm_switch_set_hops(p_remote_node->sw, lid, remote_port,
132                                             1);
133                 }
134         }
135 }
136
137 /**********************************************************************
138  **********************************************************************/
139 static void
140 __osm_ucast_mgr_process_neighbor(IN osm_ucast_mgr_t * const p_mgr,
141                                  IN osm_switch_t * const p_this_sw,
142                                  IN osm_switch_t * const p_remote_sw,
143                                  IN const uint8_t port_num,
144                                  IN const uint8_t remote_port_num)
145 {
146         osm_switch_t *p_sw, *p_next_sw;
147         uint16_t lid_ho;
148         uint8_t hops;
149
150         OSM_LOG_ENTER(p_mgr->p_log);
151
152         OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
153                 "Node 0x%" PRIx64 ", remote node 0x%" PRIx64
154                 ", port %u, remote port %u\n",
155                 cl_ntoh64(osm_node_get_node_guid(p_this_sw->p_node)),
156                 cl_ntoh64(osm_node_get_node_guid(p_remote_sw->p_node)),
157                 port_num, remote_port_num);
158
159         p_next_sw = (osm_switch_t *) cl_qmap_head(&p_mgr->p_subn->sw_guid_tbl);
160         while (p_next_sw !=
161                (osm_switch_t *) cl_qmap_end(&p_mgr->p_subn->sw_guid_tbl)) {
162                 p_sw = p_next_sw;
163                 p_next_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
164                 lid_ho = osm_node_get_base_lid(p_sw->p_node, 0);
165                 lid_ho = cl_ntoh16(lid_ho);
166                 hops = osm_switch_get_least_hops(p_remote_sw, lid_ho);
167                 if (hops == OSM_NO_PATH)
168                         continue;
169                 hops++;
170                 if (hops <
171                     osm_switch_get_hop_count(p_this_sw, lid_ho, port_num)) {
172                         if (osm_switch_set_hops
173                             (p_this_sw, lid_ho, port_num, hops) != 0)
174                                 OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
175                                         "cannot set hops for lid %u at switch 0x%"
176                                         PRIx64 "\n", lid_ho,
177                                         cl_ntoh64(osm_node_get_node_guid
178                                                   (p_this_sw->p_node)));
179                         p_mgr->some_hop_count_set = TRUE;
180                 }
181         }
182
183         OSM_LOG_EXIT(p_mgr->p_log);
184 }
185
186 /**********************************************************************
187  **********************************************************************/
188 static struct osm_remote_node *
189 find_and_add_remote_sys(osm_switch_t *sw, uint8_t port,
190                         struct osm_remote_guids_count *r)
191 {
192         unsigned i;
193         osm_physp_t *p = osm_node_get_physp_ptr(sw->p_node, port);
194         osm_node_t *node = p->p_remote_physp->p_node;
195
196         for (i = 0; i < r->count; i++)
197                 if (r->guids[i].node == node)
198                         return &r->guids[i];
199
200         r->guids[i].node = node;
201         r->guids[i].forwarded_to = 0;
202         r->count++;
203         return &r->guids[i];
204 }
205
206 static void
207 __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr,
208                              IN osm_switch_t * const p_sw,
209                              IN osm_port_t * const p_port,
210                              IN unsigned lid_offset)
211 {
212         uint16_t min_lid_ho;
213         uint16_t max_lid_ho;
214         uint16_t lid_ho;
215         uint8_t port;
216         boolean_t is_ignored_by_port_prof;
217         ib_net64_t node_guid;
218         unsigned start_from = 1;
219
220         OSM_LOG_ENTER(p_mgr->p_log);
221
222         osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho);
223
224         /* If the lids are zero - then there was some problem with
225          * the initialization. Don't handle this port. */
226         if (min_lid_ho == 0 || max_lid_ho == 0) {
227                 OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A04: "
228                         "Port 0x%" PRIx64 " has LID 0. An initialization "
229                         "error occurred. Ignoring port\n",
230                         cl_ntoh64(osm_port_get_guid(p_port)));
231                 goto Exit;
232         }
233
234         lid_ho = min_lid_ho + lid_offset;
235
236         if (lid_ho > max_lid_ho)
237                 goto Exit;
238
239         if (lid_offset)
240                 /* ignore potential overflow - it is handled in osm_switch.c */
241                 start_from = osm_switch_get_port_by_lid(p_sw, lid_ho - 1) + 1;
242
243         OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
244                 "Processing port 0x%" PRIx64 " (\'%s\' port %u), LID %u [%u,%u]\n",
245                 cl_ntoh64(osm_port_get_guid(p_port)),
246                 p_port->p_node->print_desc, p_port->p_physp->port_num,
247                 lid_ho, min_lid_ho, max_lid_ho);
248
249         /* TODO - This should be runtime error, not a CL_ASSERT() */
250         CL_ASSERT(max_lid_ho <= IB_LID_UCAST_END_HO);
251
252         node_guid = osm_node_get_node_guid(p_sw->p_node);
253
254         /*
255            The lid matrix contains the number of hops to each
256            lid from each port.  From this information we determine
257            how best to distribute the LID range across the ports
258            that can reach those LIDs.
259          */
260         port = osm_switch_recommend_path(p_sw, p_port, lid_ho, start_from,
261                                          p_mgr->p_subn->ignore_existing_lfts,
262                                          p_mgr->is_dor);
263
264         if (port == OSM_NO_PATH) {
265                 /* do not try to overwrite the ppro of non existing port ... */
266                 is_ignored_by_port_prof = TRUE;
267
268                 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
269                         "No path to get to LID %u from switch 0x%" PRIx64 "\n",
270                         lid_ho, cl_ntoh64(node_guid));
271         } else {
272                 osm_physp_t *p = osm_node_get_physp_ptr(p_sw->p_node, port);
273
274                 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
275                         "Routing LID %u to port %u"
276                         " for switch 0x%" PRIx64 "\n",
277                         lid_ho, port, cl_ntoh64(node_guid));
278
279                 /*
280                    we would like to optionally ignore this port in equalization
281                    as in the case of the Mellanox Anafa Internal PCI TCA port
282                  */
283                 is_ignored_by_port_prof = p->is_prof_ignored;
284
285                 /*
286                    We also would ignore this route if the target lid is of
287                    a switch and the port_profile_switch_node is not TRUE
288                  */
289                 if (!p_mgr->p_subn->opt.port_profile_switch_nodes)
290                         is_ignored_by_port_prof |=
291                             (osm_node_get_type(p_port->p_node) ==
292                              IB_NODE_TYPE_SWITCH);
293         }
294
295         /*
296            We have selected the port for this LID.
297            Write it to the forwarding tables.
298          */
299         p_sw->new_lft[lid_ho] = port;
300         if (!is_ignored_by_port_prof) {
301                 struct osm_remote_node *rem_node_used;
302                 osm_switch_count_path(p_sw, port);
303                 if (port > 0 && p_port->priv &&
304                     (rem_node_used = find_and_add_remote_sys(p_sw, port,
305                                                              p_port->priv)))
306                         rem_node_used->forwarded_to++;
307         }
308
309 Exit:
310         OSM_LOG_EXIT(p_mgr->p_log);
311 }
312
313 /**********************************************************************
314  **********************************************************************/
315 int osm_ucast_mgr_set_fwd_table(IN osm_ucast_mgr_t * const p_mgr,
316                                 IN osm_switch_t * const p_sw)
317 {
318         osm_node_t *p_node;
319         osm_dr_path_t *p_path;
320         osm_madw_context_t context;
321         ib_api_status_t status;
322         ib_switch_info_t si;
323         uint16_t block_id_ho = 0;
324         uint8_t block[IB_SMP_DATA_SIZE];
325         boolean_t set_swinfo_require = FALSE;
326         uint16_t lin_top;
327         uint8_t life_state;
328
329         CL_ASSERT(p_mgr);
330
331         OSM_LOG_ENTER(p_mgr->p_log);
332
333         CL_ASSERT(p_sw);
334
335         p_node = p_sw->p_node;
336
337         CL_ASSERT(p_node);
338
339         p_path = osm_physp_get_dr_path_ptr(osm_node_get_physp_ptr(p_node, 0));
340
341         /*
342            Set the top of the unicast forwarding table.
343          */
344         si = p_sw->switch_info;
345         lin_top = cl_hton16(p_sw->max_lid_ho);
346         if (lin_top != si.lin_top) {
347                 set_swinfo_require = TRUE;
348                 si.lin_top = lin_top;
349         }
350
351         /* check to see if the change state bit is on. If it is - then we
352            need to clear it. */
353         if (ib_switch_info_get_state_change(&si))
354                 life_state = ((p_mgr->p_subn->opt.packet_life_time << 3)
355                               | (si.life_state & IB_SWITCH_PSC)) & 0xfc;
356         else
357                 life_state = (p_mgr->p_subn->opt.packet_life_time << 3) & 0xf8;
358
359         if ((life_state != si.life_state)
360             || ib_switch_info_get_state_change(&si)) {
361                 set_swinfo_require = TRUE;
362                 si.life_state = life_state;
363         }
364
365         if (set_swinfo_require) {
366                 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
367                         "Setting switch FT top to LID %u\n", p_sw->max_lid_ho);
368
369                 context.si_context.light_sweep = FALSE;
370                 context.si_context.node_guid = osm_node_get_node_guid(p_node);
371                 context.si_context.set_method = TRUE;
372
373                 status = osm_req_set(p_mgr->sm, p_path, (uint8_t *) & si,
374                                      sizeof(si),
375                                      IB_MAD_ATTR_SWITCH_INFO,
376                                      0, CL_DISP_MSGID_NONE, &context);
377
378                 if (status != IB_SUCCESS)
379                         OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A06: "
380                                 "Sending SwitchInfo attribute failed (%s)\n",
381                                 ib_get_err_str(status));
382         }
383
384         /*
385            Send linear forwarding table blocks to the switch
386            as long as the switch indicates it has blocks needing
387            configuration.
388          */
389
390         context.lft_context.node_guid = osm_node_get_node_guid(p_node);
391         context.lft_context.set_method = TRUE;
392
393         if (!p_sw->new_lft) {
394                 /* any routing should provide the new_lft */
395                 CL_ASSERT(p_mgr->p_subn->opt.use_ucast_cache &&
396                           p_mgr->cache_valid && !p_sw->need_update);
397                 goto Exit;
398         }
399
400         for (block_id_ho = 0;
401              osm_switch_get_lft_block(p_sw, block_id_ho, block);
402              block_id_ho++) {
403                 if (!p_sw->need_update &&
404                     !memcmp(block,
405                             p_sw->new_lft + block_id_ho * IB_SMP_DATA_SIZE,
406                             IB_SMP_DATA_SIZE))
407                         continue;
408
409                 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
410                         "Writing FT block %u\n", block_id_ho);
411
412                 status = osm_req_set(p_mgr->sm, p_path,
413                                      p_sw->new_lft + block_id_ho * IB_SMP_DATA_SIZE,
414                                      sizeof(block),
415                                      IB_MAD_ATTR_LIN_FWD_TBL,
416                                      cl_hton32(block_id_ho),
417                                      CL_DISP_MSGID_NONE, &context);
418
419                 if (status != IB_SUCCESS)
420                         OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A05: "
421                                 "Sending linear fwd. tbl. block failed (%s)\n",
422                                 ib_get_err_str(status));
423         }
424
425 Exit:
426         OSM_LOG_EXIT(p_mgr->p_log);
427         return 0;
428 }
429
430 /**********************************************************************
431  **********************************************************************/
432 static void alloc_ports_priv(osm_ucast_mgr_t *mgr)
433 {
434         cl_qmap_t *port_tbl = &mgr->p_subn->port_guid_tbl;
435         struct osm_remote_guids_count *r;
436         osm_port_t *port;
437         cl_map_item_t *item;
438         unsigned lmc;
439
440         for (item = cl_qmap_head(port_tbl); item != cl_qmap_end(port_tbl);
441              item = cl_qmap_next(item)) {
442                 port = (osm_port_t *)item;
443                 lmc = ib_port_info_get_lmc(&port->p_physp->port_info);
444                 if (!lmc)
445                         continue;
446                 r = malloc(sizeof(*r) + sizeof(r->guids[0]) * (1 << lmc));
447                 if (!r) {
448                         OSM_LOG(mgr->p_log, OSM_LOG_ERROR, "ERR 3A09: "
449                                 "cannot allocate memory to track remote"
450                                 " systems for lmc > 0\n");
451                         port->priv = NULL;
452                         continue;
453                 }
454                 memset(r, 0, sizeof(*r) + sizeof(r->guids[0]) * (1 << lmc));
455                 port->priv = r;
456         }
457 }
458
459 static void free_ports_priv(osm_ucast_mgr_t *mgr)
460 {
461         cl_qmap_t *port_tbl = &mgr->p_subn->port_guid_tbl;
462         osm_port_t *port;
463         cl_map_item_t *item;
464         for (item = cl_qmap_head(port_tbl); item != cl_qmap_end(port_tbl);
465              item = cl_qmap_next(item)) {
466                 port = (osm_port_t *)item;
467                 if (port->priv) {
468                         free(port->priv);
469                         port->priv = NULL;
470                 }
471         }
472 }
473
474 static void
475 __osm_ucast_mgr_process_tbl(IN cl_map_item_t * const p_map_item,
476                             IN void *context)
477 {
478         osm_ucast_mgr_t *p_mgr = context;
479         osm_switch_t *const p_sw = (osm_switch_t *) p_map_item;
480         unsigned i, lids_per_port;
481
482         OSM_LOG_ENTER(p_mgr->p_log);
483
484         CL_ASSERT(p_sw && p_sw->p_node);
485
486         OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
487                 "Processing switch 0x%" PRIx64 "\n",
488                 cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)));
489
490         /* Initialize LIDs in buffer to invalid port number. */
491         memset(p_sw->new_lft, OSM_NO_PATH, IB_LID_UCAST_END_HO + 1);
492
493         if (p_mgr->p_subn->opt.lmc)
494                 alloc_ports_priv(p_mgr);
495
496         /*
497            Iterate through every port setting LID routes for each
498            port based on base LID and LMC value.
499          */
500         lids_per_port = 1 << p_mgr->p_subn->opt.lmc;
501         for (i = 0; i < lids_per_port; i++) {
502                 cl_qlist_t *list = &p_mgr->port_order_list;
503                 cl_list_item_t *item;
504                 for (item = cl_qlist_head(list); item != cl_qlist_end(list);
505                      item = cl_qlist_next(item)) {
506                         osm_port_t *port = cl_item_obj(item, port, list_item);
507                         __osm_ucast_mgr_process_port(p_mgr, p_sw, port, i);
508                 }
509         }
510
511         osm_ucast_mgr_set_fwd_table(p_mgr, p_sw);
512
513         if (p_mgr->p_subn->opt.lmc)
514                 free_ports_priv(p_mgr);
515
516         OSM_LOG_EXIT(p_mgr->p_log);
517 }
518
519 /**********************************************************************
520  **********************************************************************/
521 static void
522 __osm_ucast_mgr_process_neighbors(IN cl_map_item_t * const p_map_item,
523                                   IN void *context)
524 {
525         osm_switch_t *const p_sw = (osm_switch_t *) p_map_item;
526         osm_ucast_mgr_t *const p_mgr = (osm_ucast_mgr_t *) context;
527         osm_node_t *p_node;
528         osm_node_t *p_remote_node;
529         uint32_t port_num;
530         uint8_t remote_port_num;
531         uint32_t num_ports;
532         osm_physp_t *p_physp;
533
534         OSM_LOG_ENTER(p_mgr->p_log);
535
536         p_node = p_sw->p_node;
537
538         CL_ASSERT(p_node);
539         CL_ASSERT(osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH);
540
541         OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
542                 "Processing switch with GUID 0x%" PRIx64 "\n",
543                 cl_ntoh64(osm_node_get_node_guid(p_node)));
544
545         num_ports = osm_node_get_num_physp(p_node);
546
547         /*
548            Start with port 1 to skip the switch's management port.
549          */
550         for (port_num = 1; port_num < num_ports; port_num++) {
551                 p_remote_node = osm_node_get_remote_node(p_node,
552                                                          (uint8_t) port_num,
553                                                          &remote_port_num);
554
555                 if (p_remote_node && p_remote_node->sw
556                     && (p_remote_node != p_node)) {
557                         /* make sure the link is healthy. If it is not - don't
558                            propagate through it. */
559                         p_physp = osm_node_get_physp_ptr(p_node, port_num);
560                         if (!p_physp || !osm_link_is_healthy(p_physp))
561                                 continue;
562
563                         __osm_ucast_mgr_process_neighbor(p_mgr, p_sw,
564                                                          p_remote_node->sw,
565                                                          (uint8_t) port_num,
566                                                          remote_port_num);
567
568                 }
569         }
570
571         OSM_LOG_EXIT(p_mgr->p_log);
572 }
573
574 /**********************************************************************
575  **********************************************************************/
576 int osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr)
577 {
578         uint32_t i;
579         uint32_t iteration_max;
580         cl_qmap_t *p_sw_guid_tbl;
581
582         p_sw_guid_tbl = &p_mgr->p_subn->sw_guid_tbl;
583
584         OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE,
585                 "Starting switches' Min Hop Table Assignment\n");
586
587         /*
588            Set the switch matrices for each switch's own port 0 LID(s)
589            then set the lid matrices for the each switch's leaf nodes.
590          */
591         cl_qmap_apply_func(p_sw_guid_tbl,
592                            __osm_ucast_mgr_process_hop_0_1, p_mgr);
593
594         /*
595            Get the switch matrices for each switch's neighbors.
596            This process requires a number of iterations equal to
597            the number of switches in the subnet minus 1.
598
599            In each iteration, a switch learns the lid/port/hop
600            information (as contained by a switch's lid matrix) from
601            its immediate neighbors.  After each iteration, a switch
602            (and it's neighbors) know more routing information than
603            it did on the previous iteration.
604            Thus, by repeatedly absorbing the routing information of
605            neighbor switches, every switch eventually learns how to
606            route all LIDs on the subnet.
607
608            Note that there may not be any switches in the subnet if
609            we are in simple p2p configuration.
610          */
611         iteration_max = cl_qmap_count(p_sw_guid_tbl);
612
613         /*
614            If there are switches in the subnet, iterate until the lid
615            matrix has been constructed.  Otherwise, just immediately
616            indicate we're done if no switches exist.
617          */
618         if (iteration_max) {
619                 iteration_max--;
620
621                 /*
622                    we need to find out when the propagation of
623                    hop counts has relaxed. So this global variable
624                    is preset to 0 on each iteration and if
625                    if non of the switches was set will exit the
626                    while loop
627                  */
628                 p_mgr->some_hop_count_set = TRUE;
629                 for (i = 0; (i < iteration_max) && p_mgr->some_hop_count_set;
630                      i++) {
631                         p_mgr->some_hop_count_set = FALSE;
632                         cl_qmap_apply_func(p_sw_guid_tbl,
633                                            __osm_ucast_mgr_process_neighbors,
634                                            p_mgr);
635                 }
636                 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
637                         "Min-hop propagated in %d steps\n", i);
638         }
639
640         return 0;
641 }
642
643 /**********************************************************************
644  **********************************************************************/
645 static int ucast_mgr_setup_all_switches(osm_subn_t * p_subn)
646 {
647         osm_switch_t *p_sw;
648         uint16_t lids;
649
650         lids = (uint16_t) cl_ptr_vector_get_size(&p_subn->port_lid_tbl);
651         lids = lids ? lids - 1 : 0;
652
653         for (p_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl);
654              p_sw != (osm_switch_t *) cl_qmap_end(&p_subn->sw_guid_tbl);
655              p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item))
656                 if (osm_switch_prepare_path_rebuild(p_sw, lids)) {
657                         OSM_LOG(&p_subn->p_osm->log, OSM_LOG_ERROR, "ERR 3A0B: "
658                                 "cannot setup switch 0x%016" PRIx64 "\n",
659                                 cl_ntoh64(osm_node_get_node_guid
660                                           (p_sw->p_node)));
661                         return -1;
662                 }
663
664         return 0;
665 }
666
667 /**********************************************************************
668  **********************************************************************/
669
670 static int add_guid_to_order_list(void *ctx, uint64_t guid, char *p)
671 {
672         osm_ucast_mgr_t *m = ctx;
673         osm_port_t *port = osm_get_port_by_guid(m->p_subn, cl_hton64(guid));
674
675         if (!port) {
676                 OSM_LOG(m->p_log, OSM_LOG_DEBUG,
677                         "port guid not found: 0x%016" PRIx64 "\n", guid);
678                 return 0;
679         }
680
681         if (port->flag) {
682                 OSM_LOG(m->p_log, OSM_LOG_DEBUG,
683                         "port guid specified multiple times 0x%016" PRIx64 "\n",
684                         guid);
685                 return 0;
686         }
687
688         cl_qlist_insert_tail(&m->port_order_list, &port->list_item);
689         port->flag = 1;
690
691         return 0;
692 }
693
694 static void add_port_to_order_list(cl_map_item_t * const p_map_item, void *ctx)
695 {
696         osm_port_t *port = (osm_port_t *)p_map_item;
697         osm_ucast_mgr_t *m = ctx;
698
699         if (!port->flag)
700                 cl_qlist_insert_tail(&m->port_order_list, &port->list_item);
701         else
702                 port->flag = 0;
703 }
704
705 static int mark_ignored_port(void *ctx, uint64_t guid, char *p)
706 {
707         osm_ucast_mgr_t *m = ctx;
708         osm_node_t *node = osm_get_node_by_guid(m->p_subn, cl_hton64(guid));
709         osm_physp_t *physp;
710         unsigned port;
711
712         if (!node || !node->sw) {
713                 OSM_LOG(m->p_log, OSM_LOG_DEBUG,
714                         "switch with guid 0x%016" PRIx64 " is not found\n",
715                         guid);
716                 return 0;
717         }
718
719         if (!p || !*p || !(port = strtoul(p, NULL, 0)) ||
720             port >= node->sw->num_ports) {
721                 OSM_LOG(m->p_log, OSM_LOG_DEBUG,
722                         "bad port specified for guid 0x%016" PRIx64 "\n", guid);
723                 return 0;
724         }
725
726         physp = osm_node_get_physp_ptr(node, port);
727         if (!physp)
728                 return 0;
729
730         physp->is_prof_ignored = 1;
731
732         return 0;
733 }
734
735 static void clear_prof_ignore_flag(cl_map_item_t * const p_map_item, void *ctx)
736 {
737         osm_switch_t *sw = (osm_switch_t *)p_map_item;
738         int i;
739
740         for (i = 1; i < sw->num_ports; i++) {
741                 osm_physp_t *p = osm_node_get_physp_ptr(sw->p_node, i);
742                 if (p)
743                         p->is_prof_ignored = 0;
744         }
745 }
746
747 static int ucast_mgr_build_lfts(osm_ucast_mgr_t *p_mgr)
748 {
749         cl_qlist_init(&p_mgr->port_order_list);
750
751         if (p_mgr->p_subn->opt.guid_routing_order_file) {
752                 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
753                         "Fetching guid routing order file \'%s\'\n",
754                         p_mgr->p_subn->opt.guid_routing_order_file);
755
756                 if (parse_node_map(p_mgr->p_subn->opt.guid_routing_order_file,
757                                    add_guid_to_order_list, p_mgr))
758                         OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR : "
759                                 "cannot parse guid routing order file \'%s\'\n",
760                                 p_mgr->p_subn->opt.guid_routing_order_file);
761         }
762
763         if (p_mgr->p_subn->opt.port_prof_ignore_file) {
764                 cl_qmap_apply_func(&p_mgr->p_subn->sw_guid_tbl,
765                                    clear_prof_ignore_flag, NULL);
766                 if (parse_node_map(p_mgr->p_subn->opt.port_prof_ignore_file,
767                                    mark_ignored_port, p_mgr)) {
768                         OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR : "
769                                 "cannot parse port prof ignore file \'%s\'\n",
770                                 p_mgr->p_subn->opt.port_prof_ignore_file);
771                 }
772         }
773
774         cl_qmap_apply_func(&p_mgr->p_subn->port_guid_tbl,
775                            add_port_to_order_list, p_mgr);
776
777         cl_qmap_apply_func(&p_mgr->p_subn->sw_guid_tbl,
778                            __osm_ucast_mgr_process_tbl, p_mgr);
779
780         cl_qlist_remove_all(&p_mgr->port_order_list);
781
782         return 0;
783 }
784
785 /**********************************************************************
786  **********************************************************************/
787 static int ucast_mgr_route(struct osm_routing_engine *r, osm_opensm_t *osm)
788 {
789         int ret;
790
791         OSM_LOG(&osm->log, OSM_LOG_VERBOSE,
792                 "building routing with \'%s\' routing algorithm...\n", r->name);
793
794         if (!r->build_lid_matrices ||
795             (ret = r->build_lid_matrices(r->context)) > 0)
796                 ret = osm_ucast_mgr_build_lid_matrices(&osm->sm.ucast_mgr);
797
798         if (ret < 0) {
799                 OSM_LOG(&osm->log, OSM_LOG_ERROR,
800                         "%s: cannot build lid matrices.\n", r->name);
801                 return ret;
802         }
803
804         if (!r->ucast_build_fwd_tables ||
805             (ret = r->ucast_build_fwd_tables(r->context)) > 0)
806                 ret = ucast_mgr_build_lfts(&osm->sm.ucast_mgr);
807
808         if (ret < 0) {
809                 OSM_LOG(&osm->log, OSM_LOG_ERROR,
810                         "%s: cannot build fwd tables.\n", r->name);
811                 return ret;
812         }
813
814         osm->routing_engine_used = osm_routing_engine_type(r->name);
815
816         return 0;
817 }
818
819 int osm_ucast_mgr_process(IN osm_ucast_mgr_t * const p_mgr)
820 {
821         osm_opensm_t *p_osm;
822         struct osm_routing_engine *p_routing_eng;
823         cl_qmap_t *p_sw_guid_tbl;
824
825         OSM_LOG_ENTER(p_mgr->p_log);
826
827         p_sw_guid_tbl = &p_mgr->p_subn->sw_guid_tbl;
828         p_osm = p_mgr->p_subn->p_osm;
829         p_routing_eng = p_osm->routing_engine_list;
830
831         CL_PLOCK_EXCL_ACQUIRE(p_mgr->p_lock);
832
833         /*
834            If there are no switches in the subnet, we are done.
835          */
836         if (cl_qmap_count(p_sw_guid_tbl) == 0 ||
837             ucast_mgr_setup_all_switches(p_mgr->p_subn) < 0)
838                 goto Exit;
839
840         p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_NONE;
841         while (p_routing_eng) {
842                 if (!ucast_mgr_route(p_routing_eng, p_osm))
843                         break;
844                 p_routing_eng = p_routing_eng->next;
845         }
846
847         if (p_osm->routing_engine_used == OSM_ROUTING_ENGINE_TYPE_NONE) {
848                 /* If configured routing algorithm failed, use default MinHop */
849                 osm_ucast_mgr_build_lid_matrices(p_mgr);
850                 ucast_mgr_build_lfts(p_mgr);
851                 p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_MINHOP;
852         }
853
854         OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
855                 "%s tables configured on all switches\n",
856                 osm_routing_engine_type_str(p_osm->routing_engine_used));
857
858         if (p_mgr->p_subn->opt.use_ucast_cache)
859                 p_mgr->cache_valid = TRUE;
860
861 Exit:
862         CL_PLOCK_RELEASE(p_mgr->p_lock);
863         OSM_LOG_EXIT(p_mgr->p_log);
864         return 0;
865 }
866
867 static int ucast_build_lid_matrices(void *context)
868 {
869         return osm_ucast_mgr_build_lid_matrices(context);
870 }
871
872 static int ucast_build_lfts(void *context)
873 {
874         return ucast_mgr_build_lfts(context);
875 }
876
877 int osm_ucast_minhop_setup(struct osm_routing_engine *r, osm_opensm_t *osm)
878 {
879         r->context = &osm->sm.ucast_mgr;
880         r->build_lid_matrices = ucast_build_lid_matrices;
881         r->ucast_build_fwd_tables = ucast_build_lfts;
882         return 0;
883 }
884
885 static int ucast_dor_build_lfts(void *context)
886 {
887         osm_ucast_mgr_t *mgr = context;
888         int ret;
889
890         mgr->is_dor = 1;
891         ret = ucast_mgr_build_lfts(mgr);
892         mgr->is_dor = 0;
893
894         return ret;
895 }
896
897 int osm_ucast_dor_setup(struct osm_routing_engine *r, osm_opensm_t *osm)
898 {
899         r->context = &osm->sm.ucast_mgr;
900         r->build_lid_matrices = ucast_build_lid_matrices;
901         r->ucast_build_fwd_tables = ucast_dor_build_lfts;
902         return 0;
903 }