]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/ofed/opensm/opensm/osm_ucast_mgr.c
MFV r357608: Limit memory usage in xz(1) instead of in tuklib.
[FreeBSD/FreeBSD.git] / contrib / ofed / opensm / opensm / osm_ucast_mgr.c
1 /*
2  * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
3  * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved.
4  * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 /*
37  * Abstract:
38  *    Implementation of osm_ucast_mgr_t.
39  * This file implements the Unicast Manager object.
40  */
41
42 #if HAVE_CONFIG_H
43 #  include <config.h>
44 #endif                          /* HAVE_CONFIG_H */
45
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <ctype.h>
50 #include <iba/ib_types.h>
51 #include <complib/cl_qmap.h>
52 #include <complib/cl_debug.h>
53 #include <complib/cl_qlist.h>
54 #include <opensm/osm_file_ids.h>
55 #define FILE_ID OSM_FILE_UCAST_MGR_C
56 #include <opensm/osm_ucast_mgr.h>
57 #include <opensm/osm_sm.h>
58 #include <opensm/osm_log.h>
59 #include <opensm/osm_node.h>
60 #include <opensm/osm_switch.h>
61 #include <opensm/osm_helper.h>
62 #include <opensm/osm_msgdef.h>
63 #include <opensm/osm_opensm.h>
64
65 void osm_ucast_mgr_construct(IN osm_ucast_mgr_t * p_mgr)
66 {
67         memset(p_mgr, 0, sizeof(*p_mgr));
68 }
69
70 void osm_ucast_mgr_destroy(IN osm_ucast_mgr_t * p_mgr)
71 {
72         CL_ASSERT(p_mgr);
73
74         OSM_LOG_ENTER(p_mgr->p_log);
75
76         if (p_mgr->cache_valid)
77                 osm_ucast_cache_invalidate(p_mgr);
78
79         OSM_LOG_EXIT(p_mgr->p_log);
80 }
81
82 ib_api_status_t osm_ucast_mgr_init(IN osm_ucast_mgr_t * p_mgr, IN osm_sm_t * sm)
83 {
84         ib_api_status_t status = IB_SUCCESS;
85
86         OSM_LOG_ENTER(sm->p_log);
87
88         osm_ucast_mgr_construct(p_mgr);
89
90         p_mgr->sm = sm;
91         p_mgr->p_log = sm->p_log;
92         p_mgr->p_subn = sm->p_subn;
93         p_mgr->p_lock = sm->p_lock;
94
95         if (sm->p_subn->opt.use_ucast_cache)
96                 cl_qmap_init(&p_mgr->cache_sw_tbl);
97
98         OSM_LOG_EXIT(p_mgr->p_log);
99         return status;
100 }
101
102 /**********************************************************************
103  Add each switch's own and neighbor LIDs to its LID matrix
104 **********************************************************************/
105 static void ucast_mgr_process_hop_0_1(IN cl_map_item_t * p_map_item,
106                                       IN void *context)
107 {
108         osm_switch_t * p_sw = (osm_switch_t *) p_map_item;
109         osm_node_t *p_remote_node;
110         uint16_t lid, remote_lid;
111         uint8_t i;
112
113         lid = cl_ntoh16(osm_node_get_base_lid(p_sw->p_node, 0));
114         osm_switch_set_hops(p_sw, lid, 0, 0);
115
116         for (i = 1; i < p_sw->num_ports; i++) {
117                 osm_physp_t *p = osm_node_get_physp_ptr(p_sw->p_node, i);
118                 p_remote_node = (p && p->p_remote_physp) ?
119                     p->p_remote_physp->p_node : NULL;
120
121                 if (p_remote_node && p_remote_node->sw &&
122                     p_remote_node != p_sw->p_node) {
123                         remote_lid = osm_node_get_base_lid(p_remote_node, 0);
124                         remote_lid = cl_ntoh16(remote_lid);
125                         osm_switch_set_hops(p_sw, remote_lid, i, p->hop_wf);
126                 }
127         }
128 }
129
130 static void ucast_mgr_process_neighbor(IN osm_ucast_mgr_t * p_mgr,
131                                        IN osm_switch_t * p_this_sw,
132                                        IN osm_switch_t * p_remote_sw,
133                                        IN uint8_t port_num,
134                                        IN uint8_t remote_port_num)
135 {
136         osm_switch_t *p_sw;
137         cl_map_item_t *item;
138         uint16_t lid_ho;
139         uint16_t hops;
140         osm_physp_t *p;
141
142         OSM_LOG_ENTER(p_mgr->p_log);
143
144         OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
145                 "Node 0x%" PRIx64 ", remote node 0x%" PRIx64
146                 ", port %u, remote port %u\n",
147                 cl_ntoh64(osm_node_get_node_guid(p_this_sw->p_node)),
148                 cl_ntoh64(osm_node_get_node_guid(p_remote_sw->p_node)),
149                 port_num, remote_port_num);
150
151         p = osm_node_get_physp_ptr(p_this_sw->p_node, port_num);
152
153         for (item = cl_qmap_head(&p_mgr->p_subn->sw_guid_tbl);
154              item != cl_qmap_end(&p_mgr->p_subn->sw_guid_tbl);
155              item = cl_qmap_next(item)) {
156                 p_sw = (osm_switch_t *) item;
157                 lid_ho = cl_ntoh16(osm_node_get_base_lid(p_sw->p_node, 0));
158                 hops = osm_switch_get_least_hops(p_remote_sw, lid_ho);
159                 if (hops == OSM_NO_PATH)
160                         continue;
161                 hops += p->hop_wf;
162                 if (hops <
163                     osm_switch_get_hop_count(p_this_sw, lid_ho, port_num)) {
164                         if (osm_switch_set_hops
165                             (p_this_sw, lid_ho, port_num, (uint8_t) hops) != 0)
166                                 OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A03: "
167                                         "cannot set hops for lid %u at switch 0x%"
168                                         PRIx64 "\n", lid_ho,
169                                         cl_ntoh64(osm_node_get_node_guid
170                                                   (p_this_sw->p_node)));
171                         p_mgr->some_hop_count_set = TRUE;
172                 }
173         }
174
175         OSM_LOG_EXIT(p_mgr->p_log);
176 }
177
178 static struct osm_remote_node *find_and_add_remote_sys(osm_switch_t * sw,
179                                                        uint8_t port,
180                                                        boolean_t dor, struct
181                                                        osm_remote_guids_count
182                                                        *r)
183 {
184         unsigned i;
185         osm_physp_t *p = osm_node_get_physp_ptr(sw->p_node, port);
186         osm_node_t *node = p->p_remote_physp->p_node;
187         uint8_t rem_port = osm_physp_get_port_num(p->p_remote_physp);
188
189         for (i = 0; i < r->count; i++)
190                 if (r->guids[i].node == node)
191                         if (!dor || (r->guids[i].port == rem_port))
192                                 return &r->guids[i];
193
194         r->guids[i].node = node;
195         r->guids[i].forwarded_to = 0;
196         r->guids[i].port = rem_port;
197         r->count++;
198         return &r->guids[i];
199 }
200
201 static void ucast_mgr_process_port(IN osm_ucast_mgr_t * p_mgr,
202                                    IN osm_switch_t * p_sw,
203                                    IN osm_port_t * p_port,
204                                    IN unsigned lid_offset)
205 {
206         uint16_t min_lid_ho;
207         uint16_t max_lid_ho;
208         uint16_t lid_ho;
209         uint8_t port;
210         boolean_t is_ignored_by_port_prof;
211         ib_net64_t node_guid;
212         unsigned start_from = 1;
213
214         OSM_LOG_ENTER(p_mgr->p_log);
215
216         osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho);
217
218         /* If the lids are zero - then there was some problem with
219          * the initialization. Don't handle this port. */
220         if (min_lid_ho == 0 || max_lid_ho == 0) {
221                 OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A04: "
222                         "Port 0x%" PRIx64 " (%s port %d) has LID 0. An "
223                         "initialization error occurred. Ignoring port\n",
224                         cl_ntoh64(osm_port_get_guid(p_port)),
225                         p_port->p_node->print_desc,
226                         p_port->p_physp->port_num);
227                 goto Exit;
228         }
229
230         lid_ho = min_lid_ho + lid_offset;
231
232         if (lid_ho > max_lid_ho)
233                 goto Exit;
234
235         if (lid_offset && !p_mgr->is_dor)
236                 /* ignore potential overflow - it is handled in osm_switch.c */
237                 start_from =
238                     osm_switch_get_port_by_lid(p_sw, lid_ho - 1, OSM_NEW_LFT) + 1;
239
240         OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
241                 "Processing port 0x%" PRIx64
242                 " (\'%s\' port %u), LID %u [%u,%u]\n",
243                 cl_ntoh64(osm_port_get_guid(p_port)),
244                 p_port->p_node->print_desc, p_port->p_physp->port_num, lid_ho,
245                 min_lid_ho, max_lid_ho);
246
247         /* TODO - This should be runtime error, not a CL_ASSERT() */
248         CL_ASSERT(max_lid_ho <= IB_LID_UCAST_END_HO);
249
250         node_guid = osm_node_get_node_guid(p_sw->p_node);
251
252         /*
253            The lid matrix contains the number of hops to each
254            lid from each port.  From this information we determine
255            how best to distribute the LID range across the ports
256            that can reach those LIDs.
257          */
258         port = osm_switch_recommend_path(p_sw, p_port, lid_ho, start_from,
259                                          p_mgr->p_subn->ignore_existing_lfts,
260                                          p_mgr->p_subn->opt.lmc,
261                                          p_mgr->is_dor,
262                                          p_mgr->p_subn->opt.port_shifting,
263                                          !lid_offset && p_port->use_scatter,
264                                          OSM_LFT);
265
266         if (port == OSM_NO_PATH) {
267                 /* do not try to overwrite the ppro of non existing port ... */
268                 is_ignored_by_port_prof = TRUE;
269
270                 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
271                         "No path to get to LID %u from switch 0x%" PRIx64 "\n",
272                         lid_ho, cl_ntoh64(node_guid));
273         } else {
274                 osm_physp_t *p = osm_node_get_physp_ptr(p_sw->p_node, port);
275                 if (!p)
276                         goto Exit;
277
278                 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
279                         "Routing LID %u to port %u for switch 0x%" PRIx64 "\n",
280                         lid_ho, port, cl_ntoh64(node_guid));
281
282                 /*
283                    we would like to optionally ignore this port in equalization
284                    as in the case of the Mellanox Anafa Internal PCI TCA port
285                  */
286                 is_ignored_by_port_prof = p->is_prof_ignored;
287
288                 /*
289                    We also would ignore this route if the target lid is of
290                    a switch and the port_profile_switch_node is not TRUE
291                  */
292                 if (!p_mgr->p_subn->opt.port_profile_switch_nodes)
293                         is_ignored_by_port_prof |=
294                             (osm_node_get_type(p_port->p_node) ==
295                              IB_NODE_TYPE_SWITCH);
296         }
297
298         /*
299            We have selected the port for this LID.
300            Write it to the forwarding tables.
301          */
302         p_sw->new_lft[lid_ho] = port;
303         if (!is_ignored_by_port_prof) {
304                 struct osm_remote_node *rem_node_used;
305                 osm_switch_count_path(p_sw, port);
306                 if (port > 0 && p_port->priv &&
307                     (rem_node_used = find_and_add_remote_sys(p_sw, port,
308                                                              p_mgr->is_dor,
309                                                              p_port->priv)))
310                         rem_node_used->forwarded_to++;
311         }
312
313 Exit:
314         OSM_LOG_EXIT(p_mgr->p_log);
315 }
316
317 static void alloc_ports_priv(osm_ucast_mgr_t * mgr)
318 {
319         cl_qmap_t *port_tbl = &mgr->p_subn->port_guid_tbl;
320         struct osm_remote_guids_count *r;
321         osm_port_t *port;
322         cl_map_item_t *item;
323         unsigned lmc;
324
325         for (item = cl_qmap_head(port_tbl); item != cl_qmap_end(port_tbl);
326              item = cl_qmap_next(item)) {
327                 port = (osm_port_t *) item;
328                 lmc = ib_port_info_get_lmc(&port->p_physp->port_info);
329                 r = malloc(sizeof(*r) + sizeof(r->guids[0]) * (1 << lmc));
330                 if (!r) {
331                         OSM_LOG(mgr->p_log, OSM_LOG_ERROR, "ERR 3A09: "
332                                 "cannot allocate memory to track remote"
333                                 " systems for lmc > 0\n");
334                         port->priv = NULL;
335                         continue;
336                 }
337                 memset(r, 0, sizeof(*r) + sizeof(r->guids[0]) * (1 << lmc));
338                 port->priv = r;
339         }
340 }
341
342 static void free_ports_priv(osm_ucast_mgr_t * mgr)
343 {
344         cl_qmap_t *port_tbl = &mgr->p_subn->port_guid_tbl;
345         osm_port_t *port;
346         cl_map_item_t *item;
347         for (item = cl_qmap_head(port_tbl); item != cl_qmap_end(port_tbl);
348              item = cl_qmap_next(item)) {
349                 port = (osm_port_t *) item;
350                 if (port->priv) {
351                         free(port->priv);
352                         port->priv = NULL;
353                 }
354         }
355 }
356
357 static void ucast_mgr_process_tbl(IN cl_map_item_t * p_map_item,
358                                   IN void *context)
359 {
360         osm_ucast_mgr_t *p_mgr = context;
361         osm_switch_t * p_sw = (osm_switch_t *) p_map_item;
362         unsigned i, lids_per_port;
363
364         OSM_LOG_ENTER(p_mgr->p_log);
365
366         CL_ASSERT(p_sw && p_sw->p_node);
367
368         OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
369                 "Processing switch 0x%" PRIx64 "\n",
370                 cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)));
371
372         /* Initialize LIDs in buffer to invalid port number. */
373         memset(p_sw->new_lft, OSM_NO_PATH, p_sw->max_lid_ho + 1);
374
375         alloc_ports_priv(p_mgr);
376
377         /*
378            Iterate through every port setting LID routes for each
379            port based on base LID and LMC value.
380          */
381         lids_per_port = 1 << p_mgr->p_subn->opt.lmc;
382         for (i = 0; i < lids_per_port; i++) {
383                 cl_qlist_t *list = &p_mgr->port_order_list;
384                 cl_list_item_t *item;
385                 for (item = cl_qlist_head(list); item != cl_qlist_end(list);
386                      item = cl_qlist_next(item)) {
387                         osm_port_t *port = cl_item_obj(item, port, list_item);
388                         ucast_mgr_process_port(p_mgr, p_sw, port, i);
389                 }
390         }
391
392         free_ports_priv(p_mgr);
393
394         OSM_LOG_EXIT(p_mgr->p_log);
395 }
396
397 static void ucast_mgr_process_neighbors(IN cl_map_item_t * p_map_item,
398                                         IN void *context)
399 {
400         osm_switch_t * p_sw = (osm_switch_t *) p_map_item;
401         osm_ucast_mgr_t * p_mgr = context;
402         osm_node_t *p_node;
403         osm_node_t *p_remote_node;
404         uint32_t port_num;
405         uint8_t remote_port_num;
406         uint32_t num_ports;
407         osm_physp_t *p_physp;
408
409         OSM_LOG_ENTER(p_mgr->p_log);
410
411         p_node = p_sw->p_node;
412
413         CL_ASSERT(p_node);
414         CL_ASSERT(osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH);
415
416         OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
417                 "Processing switch with GUID 0x%" PRIx64 "\n",
418                 cl_ntoh64(osm_node_get_node_guid(p_node)));
419
420         num_ports = osm_node_get_num_physp(p_node);
421
422         /*
423            Start with port 1 to skip the switch's management port.
424          */
425         for (port_num = 1; port_num < num_ports; port_num++) {
426                 p_remote_node = osm_node_get_remote_node(p_node,
427                                                          (uint8_t) port_num,
428                                                          &remote_port_num);
429                 if (p_remote_node && p_remote_node->sw
430                     && (p_remote_node != p_node)) {
431                         /* make sure the link is healthy. If it is not - don't
432                            propagate through it. */
433                         p_physp = osm_node_get_physp_ptr(p_node, port_num);
434                         if (!p_physp || !osm_link_is_healthy(p_physp))
435                                 continue;
436
437                         ucast_mgr_process_neighbor(p_mgr, p_sw,
438                                                    p_remote_node->sw,
439                                                    (uint8_t) port_num,
440                                                    remote_port_num);
441                 }
442         }
443
444         OSM_LOG_EXIT(p_mgr->p_log);
445 }
446
447 static int set_hop_wf(void *ctx, uint64_t guid, char *p)
448 {
449         osm_ucast_mgr_t *m = ctx;
450         osm_node_t *node = osm_get_node_by_guid(m->p_subn, cl_hton64(guid));
451         osm_physp_t *physp;
452         unsigned port, hop_wf;
453         char *e;
454
455         if (!node || !node->sw) {
456                 OSM_LOG(m->p_log, OSM_LOG_DEBUG,
457                         "switch with guid 0x%016" PRIx64 " is not found\n",
458                         guid);
459                 return 0;
460         }
461
462         if (!p || !*p || !(port = strtoul(p, &e, 0)) || (p == e) ||
463             port >= node->sw->num_ports) {
464                 OSM_LOG(m->p_log, OSM_LOG_DEBUG,
465                         "bad port specified for guid 0x%016" PRIx64 "\n", guid);
466                 return 0;
467         }
468
469         p = e + 1;
470
471         if (!*p || !(hop_wf = strtoul(p, &e, 0)) || p == e || hop_wf >= 0x100) {
472                 OSM_LOG(m->p_log, OSM_LOG_DEBUG,
473                         "bad hop weight factor specified for guid 0x%016" PRIx64
474                         "port %u\n", guid, port);
475                 return 0;
476         }
477
478         physp = osm_node_get_physp_ptr(node, port);
479         if (!physp)
480                 return 0;
481
482         physp->hop_wf = hop_wf;
483
484         return 0;
485 }
486
487 static void set_default_hop_wf(cl_map_item_t * p_map_item, void *ctx)
488 {
489         osm_switch_t *sw = (osm_switch_t *) p_map_item;
490         int i;
491
492         for (i = 1; i < sw->num_ports; i++) {
493                 osm_physp_t *p = osm_node_get_physp_ptr(sw->p_node, i);
494                 if (p)
495                         p->hop_wf = 1;
496         }
497 }
498
499 static int set_search_ordering_ports(void *ctx, uint64_t guid, char *p)
500 {
501         osm_subn_t *p_subn = ctx;
502         osm_node_t *node = osm_get_node_by_guid(p_subn, cl_hton64(guid));
503         osm_switch_t *sw;
504         uint8_t *search_ordering_ports = NULL;
505         uint8_t port;
506         unsigned int *ports = NULL;
507         const int bpw = sizeof(*ports)*8;
508         int words;
509         int i = 1; /* port 0 maps to port 0 */
510
511         if (!node || !(sw = node->sw)) {
512                 OSM_LOG(&p_subn->p_osm->log, OSM_LOG_VERBOSE,
513                         "switch with guid 0x%016" PRIx64 " is not found\n",
514                         guid);
515                 return 0;
516         }
517
518         if (sw->search_ordering_ports) {
519                 OSM_LOG(&p_subn->p_osm->log, OSM_LOG_VERBOSE,
520                         "switch with guid 0x%016" PRIx64 " already listed\n",
521                         guid);
522                 return 0;
523         }
524
525         search_ordering_ports = malloc(sizeof(*search_ordering_ports)*sw->num_ports);
526         if (!search_ordering_ports) {
527                 OSM_LOG(&p_subn->p_osm->log, OSM_LOG_ERROR,
528                         "ERR 3A07: cannot allocate memory for search_ordering_ports\n");
529                 return -1;
530         }
531         memset(search_ordering_ports, 0, sizeof(*search_ordering_ports)*sw->num_ports);
532
533         /* the ports array is for record keeping of which ports have
534          * been seen */
535         words = (sw->num_ports + bpw - 1)/bpw;
536         ports = malloc(words*sizeof(*ports));
537         if (!ports) {
538                 OSM_LOG(&p_subn->p_osm->log, OSM_LOG_ERROR,
539                         "ERR 3A08: cannot allocate memory for ports\n");
540                 free(search_ordering_ports);
541                 return -1;
542         }
543         memset(ports, 0, words*sizeof(*ports));
544
545         while ((*p != '\0') && (*p != '#')) {
546                 char *e;
547
548                 port = strtoul(p, &e, 0);
549                 if ((p == e) || (port == 0) || (port >= sw->num_ports) ||
550                     !osm_node_get_physp_ptr(node, port)) {
551                         OSM_LOG(&p_subn->p_osm->log, OSM_LOG_VERBOSE,
552                                 "bad port %d specified for guid 0x%016" PRIx64 "\n",
553                                 port, guid);
554                         free(search_ordering_ports);
555                         free(ports);
556                         return 0;
557                 }
558
559                 if (ports[port/bpw] & (1u << (port%bpw))) {
560                         OSM_LOG(&p_subn->p_osm->log, OSM_LOG_VERBOSE,
561                                 "port %d already specified for guid 0x%016" PRIx64 "\n",
562                                 port, guid);
563                         free(search_ordering_ports);
564                         free(ports);
565                         return 0;
566                 }
567
568                 ports[port/bpw] |= (1u << (port%bpw));
569                 search_ordering_ports[i++] = port;
570
571                 p = e;
572                 while (isspace(*p)) {
573                         p++;
574                 }
575         }
576
577         if (i > 1) {
578                 for (port = 1; port < sw->num_ports; port++) {
579                         /* fill out the rest of the search_ordering_ports array
580                          * in sequence using the remaining unspecified
581                          * ports.
582                          */
583                         if (!(ports[port/bpw] & (1u << (port%bpw)))) {
584                                 search_ordering_ports[i++] = port;
585                         }
586                 }
587                 sw->search_ordering_ports = search_ordering_ports;
588         } else {
589                 free(search_ordering_ports);
590         }
591
592         free(ports);
593         return 0;
594 }
595
596 int osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * p_mgr)
597 {
598         uint32_t i;
599         uint32_t iteration_max;
600         cl_qmap_t *p_sw_guid_tbl;
601
602         p_sw_guid_tbl = &p_mgr->p_subn->sw_guid_tbl;
603
604         OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE,
605                 "Starting switches' Min Hop Table Assignment\n");
606
607         /*
608            Set up the weighting factors for the routing.
609          */
610         cl_qmap_apply_func(p_sw_guid_tbl, set_default_hop_wf, NULL);
611         if (p_mgr->p_subn->opt.hop_weights_file) {
612                 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
613                         "Fetching hop weight factor file \'%s\'\n",
614                         p_mgr->p_subn->opt.hop_weights_file);
615                 if (parse_node_map(p_mgr->p_subn->opt.hop_weights_file,
616                                    set_hop_wf, p_mgr)) {
617                         OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A05: "
618                                 "cannot parse hop_weights_file \'%s\'\n",
619                                 p_mgr->p_subn->opt.hop_weights_file);
620                 }
621         }
622
623         /*
624            Set the switch matrices for each switch's own port 0 LID(s)
625            then set the lid matrices for the each switch's leaf nodes.
626          */
627         cl_qmap_apply_func(p_sw_guid_tbl, ucast_mgr_process_hop_0_1, p_mgr);
628
629         /*
630            Get the switch matrices for each switch's neighbors.
631            This process requires a number of iterations equal to
632            the number of switches in the subnet minus 1.
633
634            In each iteration, a switch learns the lid/port/hop
635            information (as contained by a switch's lid matrix) from
636            its immediate neighbors.  After each iteration, a switch
637            (and it's neighbors) know more routing information than
638            it did on the previous iteration.
639            Thus, by repeatedly absorbing the routing information of
640            neighbor switches, every switch eventually learns how to
641            route all LIDs on the subnet.
642
643            Note that there may not be any switches in the subnet if
644            we are in simple p2p configuration.
645          */
646         iteration_max = cl_qmap_count(p_sw_guid_tbl);
647
648         /*
649            If there are switches in the subnet, iterate until the lid
650            matrix has been constructed.  Otherwise, just immediately
651            indicate we're done if no switches exist.
652          */
653         if (iteration_max) {
654                 iteration_max--;
655
656                 /*
657                    we need to find out when the propagation of
658                    hop counts has relaxed. So this global variable
659                    is preset to 0 on each iteration and if
660                    if non of the switches was set will exit the
661                    while loop
662                  */
663                 p_mgr->some_hop_count_set = TRUE;
664                 for (i = 0; (i < iteration_max) && p_mgr->some_hop_count_set;
665                      i++) {
666                         p_mgr->some_hop_count_set = FALSE;
667                         cl_qmap_apply_func(p_sw_guid_tbl,
668                                            ucast_mgr_process_neighbors, p_mgr);
669                 }
670                 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
671                         "Min-hop propagated in %d steps\n", i);
672         }
673
674         return 0;
675 }
676
677 static int ucast_mgr_setup_all_switches(osm_subn_t * p_subn)
678 {
679         osm_switch_t *p_sw;
680         uint16_t lids;
681
682         lids = (uint16_t) cl_ptr_vector_get_size(&p_subn->port_lid_tbl);
683         lids = lids ? lids - 1 : 0;
684
685         for (p_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl);
686              p_sw != (osm_switch_t *) cl_qmap_end(&p_subn->sw_guid_tbl);
687              p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) {
688                 if (osm_switch_prepare_path_rebuild(p_sw, lids)) {
689                         OSM_LOG(&p_subn->p_osm->log, OSM_LOG_ERROR, "ERR 3A0B: "
690                                 "cannot setup switch 0x%016" PRIx64 "\n",
691                                 cl_ntoh64(osm_node_get_node_guid
692                                           (p_sw->p_node)));
693                         return -1;
694                 }
695                 if (p_sw->search_ordering_ports) {
696                         free(p_sw->search_ordering_ports);
697                         p_sw->search_ordering_ports = NULL;
698                 }
699         }
700
701         if (p_subn->opt.port_search_ordering_file) {
702                 OSM_LOG(&p_subn->p_osm->log, OSM_LOG_DEBUG,
703                         "Fetching dimension ports file \'%s\'\n",
704                         p_subn->opt.port_search_ordering_file);
705                 if (parse_node_map(p_subn->opt.port_search_ordering_file,
706                                    set_search_ordering_ports, p_subn)) {
707                         OSM_LOG(&p_subn->p_osm->log, OSM_LOG_ERROR, "ERR 3A0F: "
708                                 "cannot parse port_search_ordering_file \'%s\'\n",
709                                 p_subn->opt.port_search_ordering_file);
710                 }
711         }
712
713         return 0;
714 }
715
716 static int add_guid_to_order_list(void *ctx, uint64_t guid, char *p)
717 {
718         osm_ucast_mgr_t *m = ctx;
719         osm_port_t *port = osm_get_port_by_guid(m->p_subn, cl_hton64(guid));
720
721         if (!port) {
722                 OSM_LOG(m->p_log, OSM_LOG_DEBUG,
723                         "port guid not found: 0x%016" PRIx64 "\n", guid);
724                 return 0;
725         }
726
727         if (port->flag) {
728                 OSM_LOG(m->p_log, OSM_LOG_DEBUG,
729                         "port guid specified multiple times 0x%016" PRIx64 "\n",
730                         guid);
731                 return 0;
732         }
733
734         cl_qlist_insert_tail(&m->port_order_list, &port->list_item);
735         port->flag = 1;
736         port->use_scatter =  (m->p_subn->opt.guid_routing_order_no_scatter == TRUE) ? 0 : m->p_subn->opt.scatter_ports;
737
738         return 0;
739 }
740
741 static void add_port_to_order_list(cl_map_item_t * p_map_item, void *ctx)
742 {
743         osm_port_t *port = (osm_port_t *) p_map_item;
744         osm_ucast_mgr_t *m = ctx;
745
746         if (!port->flag) {
747                 port->use_scatter = m->p_subn->opt.scatter_ports;
748                 cl_qlist_insert_tail(&m->port_order_list, &port->list_item);
749         } else
750                 port->flag = 0;
751 }
752
753 static int mark_ignored_port(void *ctx, uint64_t guid, char *p)
754 {
755         osm_ucast_mgr_t *m = ctx;
756         osm_node_t *node = osm_get_node_by_guid(m->p_subn, cl_hton64(guid));
757         osm_physp_t *physp;
758         unsigned port;
759
760         if (!node || !node->sw) {
761                 OSM_LOG(m->p_log, OSM_LOG_DEBUG,
762                         "switch with guid 0x%016" PRIx64 " is not found\n",
763                         guid);
764                 return 0;
765         }
766
767         if (!p || !*p || !(port = strtoul(p, NULL, 0)) ||
768             port >= node->sw->num_ports) {
769                 OSM_LOG(m->p_log, OSM_LOG_DEBUG,
770                         "bad port specified for guid 0x%016" PRIx64 "\n", guid);
771                 return 0;
772         }
773
774         physp = osm_node_get_physp_ptr(node, port);
775         if (!physp)
776                 return 0;
777
778         physp->is_prof_ignored = 1;
779
780         return 0;
781 }
782
783 static void clear_prof_ignore_flag(cl_map_item_t * p_map_item, void *ctx)
784 {
785         osm_switch_t *sw = (osm_switch_t *) p_map_item;
786         int i;
787
788         for (i = 1; i < sw->num_ports; i++) {
789                 osm_physp_t *p = osm_node_get_physp_ptr(sw->p_node, i);
790                 if (p)
791                         p->is_prof_ignored = 0;
792         }
793 }
794
795 static void add_sw_endports_to_order_list(osm_switch_t * sw,
796                                           osm_ucast_mgr_t * m)
797 {
798         osm_port_t *port;
799         osm_physp_t *p;
800         int i;
801
802         for (i = 1; i < sw->num_ports; i++) {
803                 p = osm_node_get_physp_ptr(sw->p_node, i);
804                 if (p && p->p_remote_physp && !p->p_remote_physp->p_node->sw) {
805                         port = osm_get_port_by_guid(m->p_subn,
806                                                     p->p_remote_physp->
807                                                     port_guid);
808                         if (!port || port->flag)
809                                 continue;
810                         cl_qlist_insert_tail(&m->port_order_list,
811                                              &port->list_item);
812                         port->flag = 1;
813                         port->use_scatter = m->p_subn->opt.scatter_ports;
814                 }
815         }
816 }
817
818 static void sw_count_endport_links(osm_switch_t * sw)
819 {
820         osm_physp_t *p;
821         int i;
822
823         sw->endport_links = 0;
824         for (i = 1; i < sw->num_ports; i++) {
825                 p = osm_node_get_physp_ptr(sw->p_node, i);
826                 if (p && p->p_remote_physp && !p->p_remote_physp->p_node->sw)
827                         sw->endport_links++;
828         }
829 }
830
831 static int compar_sw_load(const void *s1, const void *s2)
832 {
833 #define get_sw_endport_links(s) (*(osm_switch_t **)s)->endport_links
834         return get_sw_endport_links(s2) - get_sw_endport_links(s1);
835 }
836
837 static void sort_ports_by_switch_load(osm_ucast_mgr_t * m)
838 {
839         int i, num = cl_qmap_count(&m->p_subn->sw_guid_tbl);
840         void **s = malloc(num * sizeof(*s));
841         if (!s) {
842                 OSM_LOG(m->p_log, OSM_LOG_ERROR, "ERR 3A0C: "
843                         "No memory, skip by switch load sorting.\n");
844                 return;
845         }
846         s[0] = cl_qmap_head(&m->p_subn->sw_guid_tbl);
847         for (i = 1; i < num; i++)
848                 s[i] = cl_qmap_next(s[i - 1]);
849
850         for (i = 0; i < num; i++)
851                 sw_count_endport_links(s[i]);
852
853         qsort(s, num, sizeof(*s), compar_sw_load);
854
855         for (i = 0; i < num; i++)
856                 add_sw_endports_to_order_list(s[i], m);
857         free(s);
858 }
859
860 static int ucast_mgr_build_lfts(osm_ucast_mgr_t * p_mgr)
861 {
862         cl_qlist_init(&p_mgr->port_order_list);
863
864         if (p_mgr->p_subn->opt.guid_routing_order_file) {
865                 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
866                         "Fetching guid routing order file \'%s\'\n",
867                         p_mgr->p_subn->opt.guid_routing_order_file);
868
869                 if (parse_node_map(p_mgr->p_subn->opt.guid_routing_order_file,
870                                    add_guid_to_order_list, p_mgr))
871                         OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A0D: "
872                                 "cannot parse guid routing order file \'%s\'\n",
873                                 p_mgr->p_subn->opt.guid_routing_order_file);
874         }
875         sort_ports_by_switch_load(p_mgr);
876
877         if (p_mgr->p_subn->opt.port_prof_ignore_file) {
878                 cl_qmap_apply_func(&p_mgr->p_subn->sw_guid_tbl,
879                                    clear_prof_ignore_flag, NULL);
880                 if (parse_node_map(p_mgr->p_subn->opt.port_prof_ignore_file,
881                                    mark_ignored_port, p_mgr)) {
882                         OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A0E: "
883                                 "cannot parse port prof ignore file \'%s\'\n",
884                                 p_mgr->p_subn->opt.port_prof_ignore_file);
885                 }
886         }
887
888         cl_qmap_apply_func(&p_mgr->p_subn->port_guid_tbl,
889                            add_port_to_order_list, p_mgr);
890
891         cl_qmap_apply_func(&p_mgr->p_subn->sw_guid_tbl, ucast_mgr_process_tbl,
892                            p_mgr);
893
894         cl_qlist_remove_all(&p_mgr->port_order_list);
895
896         return 0;
897 }
898
899 static void ucast_mgr_set_fwd_top(IN cl_map_item_t * p_map_item,
900                                   IN void *cxt)
901 {
902         osm_ucast_mgr_t *p_mgr = cxt;
903         osm_switch_t * p_sw = (osm_switch_t *) p_map_item;
904         osm_node_t *p_node;
905         osm_physp_t *p_physp;
906         osm_dr_path_t *p_path;
907         osm_madw_context_t context;
908         ib_api_status_t status;
909         ib_switch_info_t si;
910         boolean_t set_swinfo_require = FALSE;
911         uint16_t lin_top;
912         uint8_t life_state;
913
914         CL_ASSERT(p_mgr);
915
916         OSM_LOG_ENTER(p_mgr->p_log);
917
918         CL_ASSERT(p_sw && p_sw->max_lid_ho);
919
920         p_node = p_sw->p_node;
921
922         CL_ASSERT(p_node);
923
924         if (p_mgr->max_lid < p_sw->max_lid_ho)
925                 p_mgr->max_lid = p_sw->max_lid_ho;
926
927         p_physp = osm_node_get_physp_ptr(p_node, 0);
928
929         CL_ASSERT(p_physp);
930
931         p_path = osm_physp_get_dr_path_ptr(p_physp);
932
933         /*
934            Set the top of the unicast forwarding table.
935          */
936         si = p_sw->switch_info;
937         lin_top = cl_hton16(p_sw->max_lid_ho);
938         if (lin_top != si.lin_top) {
939                 set_swinfo_require = TRUE;
940                 si.lin_top = lin_top;
941                 context.si_context.lft_top_change = TRUE;
942         } else
943                 context.si_context.lft_top_change = FALSE;
944
945         life_state = si.life_state;
946         ib_switch_info_set_life_time(&si, p_mgr->p_subn->opt.packet_life_time);
947
948         if (life_state != si.life_state)
949                 set_swinfo_require = TRUE;
950
951         if (set_swinfo_require) {
952                 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
953                         "Setting switch FT top to LID %u\n", p_sw->max_lid_ho);
954
955                 context.si_context.light_sweep = FALSE;
956                 context.si_context.node_guid = osm_node_get_node_guid(p_node);
957                 context.si_context.set_method = TRUE;
958
959                 status = osm_req_set(p_mgr->sm, p_path, (uint8_t *) & si,
960                                      sizeof(si), IB_MAD_ATTR_SWITCH_INFO,
961                                      0, FALSE,
962                                      ib_port_info_get_m_key(&p_physp->port_info),
963                                      CL_DISP_MSGID_NONE, &context);
964
965                 if (status != IB_SUCCESS)
966                         OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A06: "
967                                 "Sending SwitchInfo attribute failed (%s)\n",
968                                 ib_get_err_str(status));
969         }
970
971         OSM_LOG_EXIT(p_mgr->p_log);
972 }
973
974 static int set_lft_block(IN osm_switch_t *p_sw, IN osm_ucast_mgr_t *p_mgr,
975                          IN uint16_t block_id_ho)
976 {
977         osm_madw_context_t context;
978         osm_dr_path_t *p_path;
979         osm_physp_t *p_physp;
980         ib_api_status_t status;
981
982         /*
983            Send linear forwarding table blocks to the switch
984            as long as the switch indicates it has blocks needing
985            configuration.
986          */
987         if (!p_sw->new_lft) {
988                 /* any routing should provide the new_lft */
989                 CL_ASSERT(p_mgr->p_subn->opt.use_ucast_cache &&
990                           p_mgr->cache_valid && !p_sw->need_update);
991                 return -1;
992         }
993
994         p_physp = osm_node_get_physp_ptr(p_sw->p_node, 0);
995         if (!p_physp)
996                 return -1;
997
998         p_path = osm_physp_get_dr_path_ptr(p_physp);
999
1000         context.lft_context.node_guid = osm_node_get_node_guid(p_sw->p_node);
1001         context.lft_context.set_method = TRUE;
1002
1003         if (!p_sw->need_update && !p_mgr->p_subn->need_update &&
1004             !memcmp(p_sw->new_lft + block_id_ho * IB_SMP_DATA_SIZE,
1005                     p_sw->lft + block_id_ho * IB_SMP_DATA_SIZE,
1006                     IB_SMP_DATA_SIZE))
1007                 return 0;
1008
1009         /*
1010          * Zero the stored LFT block, so in case the MAD will end up
1011          * with error, we will resend it in the next sweep.
1012          */
1013         memset(p_sw->lft + block_id_ho * IB_SMP_DATA_SIZE, 0,
1014                IB_SMP_DATA_SIZE);
1015
1016         OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
1017                 "Writing FT block %u to switch 0x%" PRIx64 "\n", block_id_ho,
1018                 cl_ntoh64(context.lft_context.node_guid));
1019
1020         status = osm_req_set(p_mgr->sm, p_path,
1021                              p_sw->new_lft + block_id_ho * IB_SMP_DATA_SIZE,
1022                              IB_SMP_DATA_SIZE, IB_MAD_ATTR_LIN_FWD_TBL,
1023                              cl_hton32(block_id_ho), FALSE,
1024                              ib_port_info_get_m_key(&p_physp->port_info),
1025                              CL_DISP_MSGID_NONE, &context);
1026
1027         if (status != IB_SUCCESS) {
1028                 OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A10: "
1029                         "Sending linear fwd. tbl. block failed (%s)\n",
1030                         ib_get_err_str(status));
1031                 return -1;
1032         }
1033
1034         return 0;
1035 }
1036
1037 static void ucast_mgr_pipeline_fwd_tbl(osm_ucast_mgr_t * p_mgr)
1038 {
1039         cl_qmap_t *tbl;
1040         cl_map_item_t *item;
1041         unsigned i, max_block = p_mgr->max_lid / IB_SMP_DATA_SIZE + 1;
1042
1043         tbl = &p_mgr->p_subn->sw_guid_tbl;
1044         for (i = 0; i < max_block; i++)
1045                 for (item = cl_qmap_head(tbl); item != cl_qmap_end(tbl);
1046                      item = cl_qmap_next(item))
1047                         set_lft_block((osm_switch_t *)item, p_mgr, i);
1048 }
1049
1050 void osm_ucast_mgr_set_fwd_tables(osm_ucast_mgr_t * p_mgr)
1051 {
1052         p_mgr->max_lid = 0;
1053
1054         cl_qmap_apply_func(&p_mgr->p_subn->sw_guid_tbl, ucast_mgr_set_fwd_top,
1055                            p_mgr);
1056
1057         ucast_mgr_pipeline_fwd_tbl(p_mgr);
1058 }
1059
1060 static int ucast_mgr_route(struct osm_routing_engine *r, osm_opensm_t * osm)
1061 {
1062         int ret;
1063
1064         OSM_LOG(&osm->log, OSM_LOG_VERBOSE,
1065                 "building routing with \'%s\' routing algorithm...\n", r->name);
1066
1067         /* Set the before each lft build to keep the routes in place between sweeps */
1068         if (osm->subn.opt.scatter_ports)
1069                 srandom(osm->subn.opt.scatter_ports);
1070
1071         if (!r->build_lid_matrices ||
1072             (ret = r->build_lid_matrices(r->context)) > 0)
1073                 ret = osm_ucast_mgr_build_lid_matrices(&osm->sm.ucast_mgr);
1074
1075         if (ret < 0) {
1076                 OSM_LOG(&osm->log, OSM_LOG_ERROR,
1077                         "%s: cannot build lid matrices\n", r->name);
1078                 return ret;
1079         }
1080
1081         if (!r->ucast_build_fwd_tables ||
1082             (ret = r->ucast_build_fwd_tables(r->context)) > 0)
1083                 ret = ucast_mgr_build_lfts(&osm->sm.ucast_mgr);
1084
1085         if (ret < 0) {
1086                 OSM_LOG(&osm->log, OSM_LOG_ERROR,
1087                         "%s: cannot build fwd tables\n", r->name);
1088                 return ret;
1089         }
1090
1091         osm->routing_engine_used = r;
1092
1093         osm_ucast_mgr_set_fwd_tables(&osm->sm.ucast_mgr);
1094
1095         return 0;
1096 }
1097
1098 int osm_ucast_mgr_process(IN osm_ucast_mgr_t * p_mgr)
1099 {
1100         osm_opensm_t *p_osm;
1101         struct osm_routing_engine *p_routing_eng;
1102         cl_qmap_t *p_sw_guid_tbl;
1103         int failed = 0;
1104
1105         OSM_LOG_ENTER(p_mgr->p_log);
1106
1107         p_sw_guid_tbl = &p_mgr->p_subn->sw_guid_tbl;
1108         p_osm = p_mgr->p_subn->p_osm;
1109         p_routing_eng = p_osm->routing_engine_list;
1110
1111         CL_PLOCK_EXCL_ACQUIRE(p_mgr->p_lock);
1112
1113         /*
1114            If there are no switches in the subnet, we are done.
1115          */
1116         if (cl_qmap_count(p_sw_guid_tbl) == 0 ||
1117             ucast_mgr_setup_all_switches(p_mgr->p_subn) < 0)
1118                 goto Exit;
1119
1120         failed = -1;
1121         p_osm->routing_engine_used = NULL;
1122         while (p_routing_eng) {
1123                 failed = ucast_mgr_route(p_routing_eng, p_osm);
1124                 if (!failed)
1125                         break;
1126                 p_routing_eng = p_routing_eng->next;
1127         }
1128
1129         if (!p_osm->routing_engine_used &&
1130             p_osm->no_fallback_routing_engine != TRUE) {
1131                 /* If configured routing algorithm failed, use default MinHop */
1132                 failed = ucast_mgr_route(p_osm->default_routing_engine, p_osm);
1133         }
1134
1135         if (p_osm->routing_engine_used) {
1136                 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
1137                         "%s tables configured on all switches\n",
1138                         osm_routing_engine_type_str(p_osm->
1139                                                     routing_engine_used->type));
1140
1141                 if (p_mgr->p_subn->opt.use_ucast_cache)
1142                         p_mgr->cache_valid = TRUE;
1143         } else {
1144                 p_mgr->p_subn->subnet_initialization_error = TRUE;
1145                 OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
1146                         "No routing engine able to successfully configure "
1147                         " switch tables on current fabric\n");
1148         }
1149 Exit:
1150         CL_PLOCK_RELEASE(p_mgr->p_lock);
1151         OSM_LOG_EXIT(p_mgr->p_log);
1152         return failed;
1153 }
1154
1155 static int ucast_build_lid_matrices(void *context)
1156 {
1157         return osm_ucast_mgr_build_lid_matrices(context);
1158 }
1159
1160 static int ucast_build_lfts(void *context)
1161 {
1162         return ucast_mgr_build_lfts(context);
1163 }
1164
1165 int osm_ucast_minhop_setup(struct osm_routing_engine *r, osm_opensm_t * osm)
1166 {
1167         r->context = &osm->sm.ucast_mgr;
1168         r->build_lid_matrices = ucast_build_lid_matrices;
1169         r->ucast_build_fwd_tables = ucast_build_lfts;
1170         return 0;
1171 }
1172
1173 static int ucast_dor_build_lfts(void *context)
1174 {
1175         osm_ucast_mgr_t *mgr = context;
1176         int ret;
1177
1178         mgr->is_dor = 1;
1179         ret = ucast_mgr_build_lfts(mgr);
1180         mgr->is_dor = 0;
1181
1182         return ret;
1183 }
1184
1185 int osm_ucast_dor_setup(struct osm_routing_engine *r, osm_opensm_t * osm)
1186 {
1187         r->context = &osm->sm.ucast_mgr;
1188         r->build_lid_matrices = ucast_build_lid_matrices;
1189         r->ucast_build_fwd_tables = ucast_dor_build_lfts;
1190         return 0;
1191 }
1192
1193 int ucast_dummy_build_lid_matrices(void *context)
1194 {
1195         return 0;
1196 }