2 * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5 * Copyright (c) 2007 Simula Research Laboratory. All rights reserved.
6 * Copyright (c) 2007 Silicon Graphics Inc. All rights reserved.
7 * Copyright (c) 2008,2009 System Fabric Works, Inc. All rights reserved.
8 * Copyright (c) 2009 HNR Consulting. All rights reserved.
9 * Copyright (c) 2009-2011 ZIH, TU Dresden, Federal Republic of Germany. All rights reserved.
11 * This software is available to you under a choice of one of two
12 * licenses. You may choose to be licensed under the terms of the GNU
13 * General Public License (GPL) Version 2, available from the file
14 * COPYING in the main directory of this source tree, or the
15 * OpenIB.org BSD license below:
17 * Redistribution and use in source and binary forms, with or
18 * without modification, are permitted provided that the following
21 * - Redistributions of source code must retain the above
22 * copyright notice, this list of conditions and the following
25 * - Redistributions in binary form must reproduce the above
26 * copyright notice, this list of conditions and the following
27 * disclaimer in the documentation and/or other materials
28 * provided with the distribution.
30 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
33 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
34 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
35 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
36 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
43 * Implementation of LASH algorithm Calculation functions
48 #endif /* HAVE_CONFIG_H */
53 #include <complib/cl_debug.h>
54 #include <complib/cl_qmap.h>
55 #include <opensm/osm_file_ids.h>
56 #define FILE_ID OSM_FILE_UCAST_LASH_C
57 #include <opensm/osm_switch.h>
58 #include <opensm/osm_opensm.h>
59 #include <opensm/osm_log.h>
60 #include <opensm/osm_mesh.h>
61 #include <opensm/osm_ucast_lash.h>
63 typedef struct _reachable_dest {
65 struct _reachable_dest *next;
68 static void connect_switches(lash_t * p_lash, int sw1, int sw2, int phy_port_1)
70 osm_log_t *p_log = &p_lash->p_osm->log;
71 unsigned num = p_lash->switches[sw1]->node->num_links;
72 switch_t *s1 = p_lash->switches[sw1];
73 mesh_node_t *node = s1->node;
79 * if doing mesh analysis:
80 * - do not consider connections to self
81 * - collapse multiple connections between
82 * pair of switches to a single locical link
84 if (p_lash->p_osm->subn.opt.do_mesh_analysis) {
88 /* see if we are already linked to sw2 */
89 for (i = 0; i < num; i++) {
92 if (node->links[i]->switch_id == sw2) {
93 l->ports[l->num_ports++] = phy_port_1;
102 l->ports[l->num_ports++] = phy_port_1;
104 s2 = p_lash->switches[sw2];
105 for (i = 0; i < s2->node->num_links; i++) {
106 if (s2->node->links[i]->switch_id == sw1) {
107 s2->node->links[i]->link_id = num;
115 OSM_LOG(p_log, OSM_LOG_VERBOSE,
116 "LASH connect: %d, %d, %d\n", sw1, sw2, phy_port_1);
119 static osm_switch_t *get_osm_switch_from_port(const osm_port_t * port)
121 osm_physp_t *p = port->p_physp;
123 return p->p_node->sw;
124 else if (p->p_remote_physp && p->p_remote_physp->p_node->sw)
125 return p->p_remote_physp->p_node->sw;
129 static int cycle_exists(cdg_vertex_t * start, cdg_vertex_t * current,
130 cdg_vertex_t * prev, int visit_num)
132 int i, new_visit_num;
135 if (current != NULL && current->visiting_number > 0) {
136 if (visit_num > current->visiting_number && current->seen == 0) {
140 if (current == NULL) {
142 CL_ASSERT(prev == NULL);
145 current->visiting_number = visit_num;
148 prev->next = current;
149 CL_ASSERT(prev->to == current->from);
150 CL_ASSERT(prev->visiting_number > 0);
153 new_visit_num = visit_num + 1;
155 for (i = 0; i < current->num_deps; i++) {
157 cycle_exists(start, current->deps[i].v, current,
159 if (cycle_found == 1)
160 i = current->num_deps;
171 static inline int get_next_switch(lash_t *p_lash, int sw, int link)
173 return p_lash->switches[sw]->node->links[link]->switch_id;
176 static void remove_semipermanent_depend_for_sp(lash_t * p_lash, int sw,
177 int dest_switch, int lane)
179 switch_t **switches = p_lash->switches;
180 cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix;
181 int i_next_switch, output_link, i, next_link, i_next_next_switch,
184 int __attribute__((unused)) found;
186 output_link = switches[sw]->routing_table[dest_switch].out_link;
187 i_next_switch = get_next_switch(p_lash, sw, output_link);
189 while (sw != dest_switch) {
190 v = cdg_vertex_matrix[lane][sw][i_next_switch];
191 CL_ASSERT(v != NULL);
193 if (v->num_using_vertex == 1) {
195 cdg_vertex_matrix[lane][sw][i_next_switch] = NULL;
199 v->num_using_vertex--;
200 if (i_next_switch != dest_switch) {
202 switches[i_next_switch]->routing_table[dest_switch].out_link;
203 i_next_next_switch = get_next_switch(p_lash, i_next_switch, next_link);
206 for (i = 0; i < v->num_deps; i++)
208 cdg_vertex_matrix[lane][i_next_switch]
209 [i_next_next_switch]) {
216 if (v->deps[depend].num_used == 1) {
218 i < v->num_deps - 1; i++) {
219 v->deps[i].v = v->deps[i + 1].v;
220 v->deps[i].num_used =
221 v->deps[i + 1].num_used;
226 v->deps[depend].num_used--;
231 output_link = switches[sw]->routing_table[dest_switch].out_link;
233 if (sw != dest_switch)
234 i_next_switch = get_next_switch(p_lash, sw, output_link);
238 inline static void enqueue(cl_list_t * bfsq, switch_t * sw)
240 CL_ASSERT(sw->q_state == UNQUEUED);
241 sw->q_state = Q_MEMBER;
242 cl_list_insert_tail(bfsq, sw);
245 inline static void dequeue(cl_list_t * bfsq, switch_t ** sw)
247 *sw = (switch_t *) cl_list_remove_head(bfsq);
248 CL_ASSERT((*sw)->q_state == Q_MEMBER);
249 (*sw)->q_state = MST_MEMBER;
252 static int get_phys_connection(switch_t *sw, int switch_to)
256 for (i = 0; i < sw->node->num_links; i++)
257 if (sw->node->links[i]->switch_id == switch_to)
262 static void shortest_path(lash_t * p_lash, int ir)
264 switch_t **switches = p_lash->switches, *sw, *swi;
268 cl_list_construct(&bfsq);
269 cl_list_init(&bfsq, 20);
271 enqueue(&bfsq, switches[ir]);
273 while (!cl_is_list_empty(&bfsq)) {
275 for (i = 0; i < sw->node->num_links; i++) {
276 swi = switches[sw->node->links[i]->switch_id];
277 if (swi->q_state == UNQUEUED) {
279 sw->dij_channels[sw->used_channels++] = swi->id;
284 cl_list_destroy(&bfsq);
287 static int generate_routing_func_for_mst(lash_t * p_lash, int sw_id,
288 reachable_dest_t ** destinations)
291 switch_t *sw = p_lash->switches[sw_id];
292 int num_channels = sw->used_channels;
293 reachable_dest_t *dest, *i_dest, *concat_dest = NULL, *prev;
295 for (i = 0; i < num_channels; i++) {
296 next_switch = sw->dij_channels[i];
297 if (generate_routing_func_for_mst(p_lash, next_switch, &dest))
303 while (i_dest != NULL) {
304 if (sw->routing_table[i_dest->switch_id].out_link ==
306 sw->routing_table[i_dest->switch_id].out_link =
307 get_phys_connection(sw, next_switch);
310 i_dest = i_dest->next;
313 CL_ASSERT(prev->next == NULL);
314 prev->next = concat_dest;
318 i_dest = (reachable_dest_t *) malloc(sizeof(reachable_dest_t));
321 i_dest->switch_id = sw->id;
322 i_dest->next = concat_dest;
323 *destinations = i_dest;
327 static int generate_cdg_for_sp(lash_t * p_lash, int sw, int dest_switch,
330 unsigned num_switches = p_lash->num_switches;
331 switch_t **switches = p_lash->switches;
332 cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix;
333 int next_switch, output_link, j, exists;
334 cdg_vertex_t *v, *prev = NULL;
336 output_link = switches[sw]->routing_table[dest_switch].out_link;
337 next_switch = get_next_switch(p_lash, sw, output_link);
339 while (sw != dest_switch) {
341 if (cdg_vertex_matrix[lane][sw][next_switch] == NULL) {
342 v = calloc(1, sizeof(*v) + (num_switches - 1) * sizeof(v->deps[0]));
348 cdg_vertex_matrix[lane][sw][next_switch] = v;
350 v = cdg_vertex_matrix[lane][sw][next_switch];
352 v->num_using_vertex++;
357 for (j = 0; j < prev->num_deps; j++)
358 if (prev->deps[j].v == v) {
360 prev->deps[j].num_used++;
364 prev->deps[prev->num_deps].v = v;
365 prev->deps[prev->num_deps].num_used++;
368 CL_ASSERT(prev->num_deps < (int)num_switches);
371 prev->num_temp_depend++;
377 output_link = switches[sw]->routing_table[dest_switch].out_link;
379 if (sw != dest_switch) {
380 CL_ASSERT(output_link != NONE);
381 next_switch = get_next_switch(p_lash, sw, output_link);
389 static void set_temp_depend_to_permanent_for_sp(lash_t * p_lash, int sw,
390 int dest_switch, int lane)
392 switch_t **switches = p_lash->switches;
393 cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix;
394 int next_switch, output_link;
397 output_link = switches[sw]->routing_table[dest_switch].out_link;
398 next_switch = get_next_switch(p_lash, sw, output_link);
400 while (sw != dest_switch) {
401 v = cdg_vertex_matrix[lane][sw][next_switch];
402 CL_ASSERT(v != NULL);
407 v->num_temp_depend = 0;
410 output_link = switches[sw]->routing_table[dest_switch].out_link;
412 if (sw != dest_switch)
413 next_switch = get_next_switch(p_lash, sw, output_link);
418 static void remove_temp_depend_for_sp(lash_t * p_lash, int sw, int dest_switch,
421 switch_t **switches = p_lash->switches;
422 cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix;
423 int next_switch, output_link, i;
426 output_link = switches[sw]->routing_table[dest_switch].out_link;
427 next_switch = get_next_switch(p_lash, sw, output_link);
429 while (sw != dest_switch) {
430 v = cdg_vertex_matrix[lane][sw][next_switch];
431 CL_ASSERT(v != NULL);
434 cdg_vertex_matrix[lane][sw][next_switch] = NULL;
437 CL_ASSERT(v->num_temp_depend <= v->num_deps);
438 v->num_deps = v->num_deps - v->num_temp_depend;
439 v->num_temp_depend = 0;
440 v->num_using_vertex--;
442 for (i = v->num_deps; i < p_lash->num_switches - 1; i++)
443 v->deps[i].num_used = 0;
447 output_link = switches[sw]->routing_table[dest_switch].out_link;
449 if (sw != dest_switch)
450 next_switch = get_next_switch(p_lash, sw, output_link);
455 static int balance_virtual_lanes(lash_t * p_lash, unsigned lanes_needed)
457 unsigned num_switches = p_lash->num_switches;
458 cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix;
459 int *num_mst_in_lane = p_lash->num_mst_in_lane;
460 int ***virtual_location = p_lash->virtual_location;
461 int min_filled_lane, max_filled_lane, trials;
462 int old_min_filled_lane, old_max_filled_lane, new_num_min_lane,
465 int src, dest, start, next_switch, output_link;
466 int next_switch2, output_link2;
467 int stop = 0, cycle_found;
469 unsigned start_vl = p_lash->p_osm->subn.opt.lash_start_vl;
472 min_filled_lane = lanes_needed - 1;
474 trials = num_mst_in_lane[max_filled_lane];
475 if (lanes_needed == 1)
479 src = abs(rand()) % (num_switches);
480 dest = abs(rand()) % (num_switches);
482 while (virtual_location[src][dest][max_filled_lane] != 1) {
484 if (dest == num_switches - 1)
490 && virtual_location[src][dest][max_filled_lane]
492 if (dest == num_switches - 1)
498 if (virtual_location[src][dest][max_filled_lane] != 1) {
499 if (src == num_switches - 1)
506 if (generate_cdg_for_sp(p_lash, src, dest, min_filled_lane) ||
507 generate_cdg_for_sp(p_lash, dest, src, min_filled_lane))
510 output_link = p_lash->switches[src]->routing_table[dest].out_link;
511 next_switch = get_next_switch(p_lash, src, output_link);
513 output_link2 = p_lash->switches[dest]->routing_table[src].out_link;
514 next_switch2 = get_next_switch(p_lash, dest, output_link2);
516 CL_ASSERT(cdg_vertex_matrix[min_filled_lane][src][next_switch] != NULL);
517 CL_ASSERT(cdg_vertex_matrix[min_filled_lane][dest][next_switch2] != NULL);
520 cycle_exists(cdg_vertex_matrix[min_filled_lane][src][next_switch], NULL, NULL,
523 cycle_exists(cdg_vertex_matrix[min_filled_lane][dest][next_switch2], NULL, NULL,
526 for (i = 0; i < num_switches; i++)
527 for (j = 0; j < num_switches; j++)
528 if (cdg_vertex_matrix[min_filled_lane][i][j] != NULL) {
529 cdg_vertex_matrix[min_filled_lane][i][j]->visiting_number =
531 cdg_vertex_matrix[min_filled_lane][i][j]->seen = 0;
534 if (cycle_found == 1 || cycle_found2 == 1) {
535 remove_temp_depend_for_sp(p_lash, src, dest, min_filled_lane);
536 remove_temp_depend_for_sp(p_lash, dest, src, min_filled_lane);
538 virtual_location[src][dest][max_filled_lane] = 2;
539 virtual_location[dest][src][max_filled_lane] = 2;
543 set_temp_depend_to_permanent_for_sp(p_lash, src, dest, min_filled_lane);
544 set_temp_depend_to_permanent_for_sp(p_lash, dest, src, min_filled_lane);
546 num_mst_in_lane[max_filled_lane]--;
547 num_mst_in_lane[max_filled_lane]--;
548 num_mst_in_lane[min_filled_lane]++;
549 num_mst_in_lane[min_filled_lane]++;
551 remove_semipermanent_depend_for_sp(p_lash, src, dest, max_filled_lane);
552 remove_semipermanent_depend_for_sp(p_lash, dest, src, max_filled_lane);
553 virtual_location[src][dest][max_filled_lane] = 0;
554 virtual_location[dest][src][max_filled_lane] = 0;
555 virtual_location[src][dest][min_filled_lane] = 1;
556 virtual_location[dest][src][min_filled_lane] = 1;
557 p_lash->switches[src]->routing_table[dest].lane = min_filled_lane + start_vl;
558 p_lash->switches[dest]->routing_table[src].lane = min_filled_lane + start_vl;
564 if (num_mst_in_lane[max_filled_lane] - num_mst_in_lane[min_filled_lane] <
565 p_lash->balance_limit)
569 old_min_filled_lane = min_filled_lane;
570 old_max_filled_lane = max_filled_lane;
572 new_num_min_lane = MAX_INT;
573 new_num_max_lane = 0;
575 for (i = 0; i < lanes_needed; i++) {
577 if (num_mst_in_lane[i] < new_num_min_lane) {
578 new_num_min_lane = num_mst_in_lane[i];
582 if (num_mst_in_lane[i] > new_num_max_lane) {
583 new_num_max_lane = num_mst_in_lane[i];
588 if (old_min_filled_lane != min_filled_lane) {
589 trials = num_mst_in_lane[max_filled_lane];
590 for (i = 0; i < num_switches; i++)
591 for (j = 0; j < num_switches; j++)
592 if (virtual_location[i][j][max_filled_lane] == 2)
593 virtual_location[i][j][max_filled_lane] = 1;
596 if (old_max_filled_lane != max_filled_lane) {
597 trials = num_mst_in_lane[max_filled_lane];
598 for (i = 0; i < num_switches; i++)
599 for (j = 0; j < num_switches; j++)
600 if (virtual_location[i][j][old_max_filled_lane] == 2)
601 virtual_location[i][j][old_max_filled_lane] = 1;
607 static switch_t *switch_create(lash_t * p_lash, unsigned id, osm_switch_t * p_sw)
609 unsigned num_switches = p_lash->num_switches;
610 unsigned num_ports = p_sw->num_ports;
614 sw = malloc(sizeof(*sw) + num_switches * sizeof(sw->routing_table[0]));
618 memset(sw, 0, sizeof(*sw));
619 for (i = 0; i < num_switches; i++) {
620 sw->routing_table[i].out_link = NONE;
621 sw->routing_table[i].lane = NONE;
625 sw->dij_channels = malloc(num_ports * sizeof(int));
626 if (!sw->dij_channels) {
634 if (osm_mesh_node_create(p_lash, sw)) {
635 free(sw->dij_channels);
643 static void switch_delete(lash_t *p_lash, switch_t * sw)
645 if (sw->dij_channels)
646 free(sw->dij_channels);
650 static void delete_mesh_switches(lash_t *p_lash)
652 if (p_lash->switches) {
654 for (id = 0; ((int)id) < p_lash->num_switches; id++)
655 if (p_lash->switches[id])
656 osm_mesh_node_delete(p_lash,
657 p_lash->switches[id]);
661 static void free_lash_structures(lash_t * p_lash)
663 unsigned int i, j, k;
664 unsigned num_switches = p_lash->num_switches;
665 osm_log_t *p_log = &p_lash->p_osm->log;
667 OSM_LOG_ENTER(p_log);
669 delete_mesh_switches(p_lash);
671 /* free cdg_vertex_matrix */
672 for (i = 0; i < p_lash->vl_min; i++) {
673 for (j = 0; j < num_switches; j++) {
674 for (k = 0; k < num_switches; k++)
675 if (p_lash->cdg_vertex_matrix[i][j][k])
676 free(p_lash->cdg_vertex_matrix[i][j][k]);
677 if (p_lash->cdg_vertex_matrix[i][j])
678 free(p_lash->cdg_vertex_matrix[i][j]);
680 if (p_lash->cdg_vertex_matrix[i])
681 free(p_lash->cdg_vertex_matrix[i]);
684 if (p_lash->cdg_vertex_matrix)
685 free(p_lash->cdg_vertex_matrix);
687 /* free virtual_location */
688 for (i = 0; i < num_switches; i++) {
689 for (j = 0; j < num_switches; j++) {
690 if (p_lash->virtual_location[i][j])
691 free(p_lash->virtual_location[i][j]);
693 if (p_lash->virtual_location[i])
694 free(p_lash->virtual_location[i]);
696 if (p_lash->virtual_location)
697 free(p_lash->virtual_location);
702 static int init_lash_structures(lash_t * p_lash)
704 unsigned vl_min = p_lash->vl_min;
705 unsigned num_switches = p_lash->num_switches;
706 osm_log_t *p_log = &p_lash->p_osm->log;
708 unsigned int i, j, k;
710 OSM_LOG_ENTER(p_log);
712 /* initialise cdg_vertex_matrix[num_switches][num_switches][num_switches] */
713 p_lash->cdg_vertex_matrix =
714 (cdg_vertex_t ****) malloc(vl_min * sizeof(cdg_vertex_t ***));
715 if (p_lash->cdg_vertex_matrix == NULL)
717 for (i = 0; i < vl_min; i++) {
718 p_lash->cdg_vertex_matrix[i] =
719 (cdg_vertex_t ***) malloc(num_switches *
720 sizeof(cdg_vertex_t **));
722 if (p_lash->cdg_vertex_matrix[i] == NULL)
726 for (i = 0; i < vl_min; i++) {
727 for (j = 0; j < num_switches; j++) {
728 p_lash->cdg_vertex_matrix[i][j] =
729 (cdg_vertex_t **) malloc(num_switches *
730 sizeof(cdg_vertex_t *));
731 if (p_lash->cdg_vertex_matrix[i][j] == NULL)
734 for (k = 0; k < num_switches; k++)
735 p_lash->cdg_vertex_matrix[i][j][k] = NULL;
740 * initialise virtual_location[num_switches][num_switches][num_layers],
743 p_lash->virtual_location =
744 (int ***)malloc(num_switches * sizeof(int ***));
745 if (p_lash->virtual_location == NULL)
748 for (i = 0; i < num_switches; i++) {
749 p_lash->virtual_location[i] =
750 (int **)malloc(num_switches * sizeof(int **));
751 if (p_lash->virtual_location[i] == NULL)
755 for (i = 0; i < num_switches; i++) {
756 for (j = 0; j < num_switches; j++) {
757 p_lash->virtual_location[i][j] =
758 (int *)malloc(vl_min * sizeof(int *));
759 if (p_lash->virtual_location[i][j] == NULL)
761 for (k = 0; k < vl_min; k++)
762 p_lash->virtual_location[i][j][k] = 0;
766 /* initialise num_mst_in_lane[num_switches], default 0 */
767 memset(p_lash->num_mst_in_lane, 0,
768 IB_MAX_NUM_VLS * sizeof(p_lash->num_mst_in_lane[0]));
774 OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D01: "
775 "Could not allocate required memory for LASH errno %d, errno %d for lack of memory\n",
783 static int lash_core(lash_t * p_lash)
785 osm_log_t *p_log = &p_lash->p_osm->log;
786 unsigned num_switches = p_lash->num_switches;
787 switch_t **switches = p_lash->switches;
788 unsigned lanes_needed = 1;
789 unsigned int i, j, k, dest_switch = 0;
790 reachable_dest_t *dests, *idest;
793 int stop = 0, output_link, i_next_switch;
794 int output_link2, i_next_switch2;
795 int cycle_found2 = 0;
797 int *switch_bitmap = NULL; /* Bitmap to check if we have processed this pair */
798 unsigned start_vl = p_lash->p_osm->subn.opt.lash_start_vl;
800 OSM_LOG_ENTER(p_log);
802 if (p_lash->p_osm->subn.opt.do_mesh_analysis && osm_do_mesh_analysis(p_lash)) {
803 OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D05: Mesh analysis failed\n");
807 for (i = 0; i < num_switches; i++) {
809 shortest_path(p_lash, i);
810 if (generate_routing_func_for_mst(p_lash, i, &dests)) {
811 OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D06: "
812 "generate_routing_func_for_mst failed\n");
817 while (idest != NULL) {
823 for (j = 0; j < num_switches; j++) {
824 switches[j]->used_channels = 0;
825 switches[j]->q_state = UNQUEUED;
829 switch_bitmap = calloc(num_switches * num_switches, sizeof(int));
830 if (!switch_bitmap) {
831 OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D04: "
832 "Failed allocating switch_bitmap - out of memory\n");
836 for (i = 0; i < num_switches; i++) {
837 for (dest_switch = 0; dest_switch < num_switches; dest_switch++)
838 if (dest_switch != i && switch_bitmap[i * num_switches + dest_switch] == 0) {
841 while (v_lane < lanes_needed && stop == 0) {
842 if (generate_cdg_for_sp(p_lash, i, dest_switch, v_lane) ||
843 generate_cdg_for_sp(p_lash, dest_switch, i, v_lane)) {
844 OSM_LOG(p_log, OSM_LOG_ERROR,
845 "ERR 4D07: generate_cdg_for_sp failed\n");
850 switches[i]->routing_table[dest_switch].out_link;
852 switches[dest_switch]->routing_table[i].out_link;
854 i_next_switch = get_next_switch(p_lash, i, output_link);
855 i_next_switch2 = get_next_switch(p_lash, dest_switch, output_link2);
858 cdg_vertex_matrix[v_lane][i][i_next_switch] !=
861 cdg_vertex_matrix[v_lane][dest_switch]
862 [i_next_switch2] != NULL);
865 cycle_exists(p_lash->
866 cdg_vertex_matrix[v_lane][i]
867 [i_next_switch], NULL, NULL, 1);
869 cycle_exists(p_lash->
870 cdg_vertex_matrix[v_lane][dest_switch]
871 [i_next_switch2], NULL, NULL, 1);
873 for (j = 0; j < num_switches; j++)
874 for (k = 0; k < num_switches; k++)
876 cdg_vertex_matrix[v_lane][j][k] !=
879 cdg_vertex_matrix[v_lane][j]
880 [k]->visiting_number = 0;
882 cdg_vertex_matrix[v_lane][j]
886 if (cycle_found == 1 || cycle_found2 == 1) {
887 remove_temp_depend_for_sp(p_lash, i, dest_switch,
889 remove_temp_depend_for_sp(p_lash, dest_switch, i,
893 set_temp_depend_to_permanent_for_sp(p_lash, i,
896 set_temp_depend_to_permanent_for_sp(p_lash,
900 p_lash->num_mst_in_lane[v_lane]++;
901 p_lash->num_mst_in_lane[v_lane]++;
905 switches[i]->routing_table[dest_switch].lane = v_lane + start_vl;
906 switches[dest_switch]->routing_table[i].lane = v_lane + start_vl;
908 if (cycle_found == 1 || cycle_found2 == 1) {
909 if (++lanes_needed > p_lash->vl_min)
910 goto Error_Not_Enough_Lanes;
912 if (generate_cdg_for_sp(p_lash, i, dest_switch, v_lane) ||
913 generate_cdg_for_sp(p_lash, dest_switch, i, v_lane)) {
914 OSM_LOG(p_log, OSM_LOG_ERROR,
915 "ERR 4D08: generate_cdg_for_sp failed\n");
919 set_temp_depend_to_permanent_for_sp(p_lash, i, dest_switch,
921 set_temp_depend_to_permanent_for_sp(p_lash, dest_switch, i,
924 p_lash->num_mst_in_lane[v_lane]++;
925 p_lash->num_mst_in_lane[v_lane]++;
927 p_lash->virtual_location[i][dest_switch][v_lane] = 1;
928 p_lash->virtual_location[dest_switch][i][v_lane] = 1;
930 switch_bitmap[i * num_switches + dest_switch] = 1;
931 switch_bitmap[dest_switch * num_switches + i] = 1;
935 for (i = 0; i < lanes_needed; i++)
936 OSM_LOG(p_log, OSM_LOG_INFO, "Lanes in layer %d: %d\n",
937 i, p_lash->num_mst_in_lane[i]);
939 OSM_LOG(p_log, OSM_LOG_INFO,
940 "Lanes needed: %d, Balancing\n", lanes_needed);
942 if (balance_virtual_lanes(p_lash, lanes_needed)) {
943 OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D09: Balancing failed\n");
947 for (i = 0; i < lanes_needed; i++)
948 OSM_LOG(p_log, OSM_LOG_INFO, "Lanes in layer %d: %d\n",
949 i, p_lash->num_mst_in_lane[i]);
954 Error_Not_Enough_Lanes:
955 OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D02: "
956 "Lane requirements (%d) exceed available lanes (%d)"
957 " with starting lane (%d)\n",
958 lanes_needed, p_lash->vl_min, start_vl);
966 static unsigned get_lash_id(osm_switch_t * p_sw)
968 return ((switch_t *) p_sw->priv)->id;
971 static int get_next_port(switch_t *sw, int link)
973 link_t *l = sw->node->links[link];
974 int port = l->next_port++;
977 * note if not doing mesh analysis
978 * then num_ports is always 1
980 if (l->next_port >= l->num_ports)
983 return l->ports[port];
986 static void populate_fwd_tbls(lash_t * p_lash)
988 osm_log_t *p_log = &p_lash->p_osm->log;
989 osm_subn_t *p_subn = &p_lash->p_osm->subn;
990 osm_switch_t *p_sw, *p_next_sw, *p_dst_sw;
992 uint16_t max_lid_ho, lid;
994 OSM_LOG_ENTER(p_log);
996 p_next_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl);
998 /* Go through each switch individually */
999 while (p_next_sw != (osm_switch_t *) cl_qmap_end(&p_subn->sw_guid_tbl)) {
1000 uint64_t current_guid;
1003 p_next_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
1005 max_lid_ho = p_sw->max_lid_ho;
1006 current_guid = p_sw->p_node->node_info.port_guid;
1009 memset(p_sw->new_lft, OSM_NO_PATH, p_sw->lft_size);
1011 for (lid = 1; lid <= max_lid_ho; lid++) {
1012 port = osm_get_port_by_lid_ho(p_subn, lid);
1016 p_dst_sw = get_osm_switch_from_port(port);
1017 if (p_dst_sw == p_sw) {
1018 uint8_t egress_port = port->p_node->sw ? 0 :
1019 port->p_physp->p_remote_physp->port_num;
1020 p_sw->new_lft[lid] = egress_port;
1021 OSM_LOG(p_log, OSM_LOG_VERBOSE,
1022 "LASH fwd MY SRC SRC GUID 0x%016" PRIx64
1023 " src lash id (%d), src lid no (%u) src lash port (%d) "
1024 "DST GUID 0x%016" PRIx64
1025 " src lash id (%d), src lash port (%d)\n",
1026 cl_ntoh64(current_guid), -1, lid,
1027 egress_port, cl_ntoh64(current_guid),
1029 } else if (p_dst_sw) {
1030 unsigned dst_lash_switch_id =
1031 get_lash_id(p_dst_sw);
1032 uint8_t lash_egress_port =
1034 routing_table[dst_lash_switch_id].out_link;
1035 uint8_t physical_egress_port =
1036 get_next_port(sw, lash_egress_port);
1038 p_sw->new_lft[lid] = physical_egress_port;
1039 OSM_LOG(p_log, OSM_LOG_VERBOSE,
1040 "LASH fwd SRC GUID 0x%016" PRIx64
1041 " src lash id (%d), "
1042 "src lid no (%u) src lash port (%d) "
1043 "DST GUID 0x%016" PRIx64
1044 " src lash id (%d), src lash port (%d)\n",
1045 cl_ntoh64(current_guid), sw->id, lid,
1047 cl_ntoh64(p_dst_sw->p_node->node_info.
1050 physical_egress_port);
1054 OSM_LOG_EXIT(p_log);
1057 static void osm_lash_process_switch(lash_t * p_lash, osm_switch_t * p_sw)
1059 osm_log_t *p_log = &p_lash->p_osm->log;
1061 osm_physp_t *p_current_physp, *p_remote_physp;
1062 unsigned switch_a_lash_id, switch_b_lash_id;
1064 OSM_LOG_ENTER(p_log);
1066 switch_a_lash_id = get_lash_id(p_sw);
1067 port_count = osm_node_get_num_physp(p_sw->p_node);
1069 /* starting at port 1, ignoring management port on switch */
1070 for (i = 1; i < port_count; i++) {
1072 p_current_physp = osm_node_get_physp_ptr(p_sw->p_node, i);
1073 if (p_current_physp) {
1074 p_remote_physp = p_current_physp->p_remote_physp;
1075 if (p_remote_physp && p_remote_physp->p_node->sw) {
1076 int physical_port_a_num =
1077 osm_physp_get_port_num(p_current_physp);
1078 int physical_port_b_num =
1079 osm_physp_get_port_num(p_remote_physp);
1081 get_lash_id(p_remote_physp->p_node->sw);
1083 connect_switches(p_lash, switch_a_lash_id,
1085 physical_port_a_num);
1086 OSM_LOG(p_log, OSM_LOG_VERBOSE,
1087 "LASH SUCCESS connected G 0x%016" PRIx64
1088 " , lash_id(%u), P(%u) " " to G 0x%016"
1089 PRIx64 " , lash_id(%u) , P(%u)\n",
1090 cl_ntoh64(osm_physp_get_port_guid
1092 switch_a_lash_id, physical_port_a_num,
1093 cl_ntoh64(osm_physp_get_port_guid
1095 switch_b_lash_id, physical_port_b_num);
1100 OSM_LOG_EXIT(p_log);
1103 static void lash_cleanup(lash_t * p_lash)
1105 osm_subn_t *p_subn = &p_lash->p_osm->subn;
1106 osm_switch_t *p_next_sw, *p_sw;
1108 /* drop any existing references to old lash switches */
1109 p_next_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl);
1110 while (p_next_sw != (osm_switch_t *) cl_qmap_end(&p_subn->sw_guid_tbl)) {
1112 p_next_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
1116 if (p_lash->switches) {
1118 for (id = 0; ((int)id) < p_lash->num_switches; id++)
1119 if (p_lash->switches[id])
1120 switch_delete(p_lash, p_lash->switches[id]);
1121 free(p_lash->switches);
1123 p_lash->switches = NULL;
1127 static int discover_network_properties()
1128 Traverse the topology of the network in order to determine
1129 - the maximum number of switches,
1130 - the minimum number of virtual layers
1133 static int discover_network_properties(lash_t * p_lash)
1137 osm_subn_t *p_subn = &p_lash->p_osm->subn;
1138 osm_switch_t *p_next_sw, *p_sw;
1139 osm_log_t *p_log = &p_lash->p_osm->log;
1141 p_lash->num_switches = cl_qmap_count(&p_subn->sw_guid_tbl);
1143 p_lash->switches = calloc(p_lash->num_switches, sizeof(switch_t *));
1144 if (!p_lash->switches)
1147 vl_min = 5; /* set to a high value */
1149 p_next_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl);
1150 while (p_next_sw != (osm_switch_t *) cl_qmap_end(&p_subn->sw_guid_tbl)) {
1151 uint16_t port_count;
1153 p_next_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
1155 p_lash->switches[id] = switch_create(p_lash, id, p_sw);
1156 if (!p_lash->switches[id])
1160 port_count = osm_node_get_num_physp(p_sw->p_node);
1162 /* Note, ignoring port 0. management port */
1163 for (i = 1; i < port_count; i++) {
1164 osm_physp_t *p_current_physp =
1165 osm_node_get_physp_ptr(p_sw->p_node, i);
1168 && p_current_physp->p_remote_physp) {
1170 ib_port_info_t *p_port_info =
1171 &p_current_physp->port_info;
1172 uint8_t port_vl_min =
1173 ib_port_info_get_op_vls(p_port_info);
1174 if (port_vl_min && port_vl_min < vl_min)
1175 vl_min = port_vl_min;
1180 vl_min = 1 << (vl_min - 1);
1184 if (p_lash->p_osm->subn.opt.lash_start_vl >= vl_min) {
1185 OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D03: "
1186 "Start VL(%d) too high for min operational vl(%d)\n",
1187 p_lash->p_osm->subn.opt.lash_start_vl, vl_min);
1191 p_lash->vl_min = vl_min - p_lash->p_osm->subn.opt.lash_start_vl;
1193 OSM_LOG(p_log, OSM_LOG_INFO,
1194 "min operational vl(%d) start vl(%d) max_switches(%d)\n",
1195 p_lash->vl_min, p_lash->p_osm->subn.opt.lash_start_vl,
1196 p_lash->num_switches);
1200 static void process_switches(lash_t * p_lash)
1202 osm_switch_t *p_sw, *p_next_sw;
1203 osm_subn_t *p_subn = &p_lash->p_osm->subn;
1205 /* Go through each switch and process it. i.e build the connection
1206 structure required by LASH */
1207 p_next_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl);
1208 while (p_next_sw != (osm_switch_t *) cl_qmap_end(&p_subn->sw_guid_tbl)) {
1210 p_next_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
1212 osm_lash_process_switch(p_lash, p_sw);
1216 static int lash_process(void *context)
1218 lash_t *p_lash = context;
1219 osm_log_t *p_log = &p_lash->p_osm->log;
1222 OSM_LOG_ENTER(p_log);
1224 p_lash->balance_limit = 6;
1226 /* everything starts here */
1227 lash_cleanup(p_lash);
1229 status = discover_network_properties(p_lash);
1233 status = init_lash_structures(p_lash);
1237 process_switches(p_lash);
1239 status = lash_core(p_lash);
1243 populate_fwd_tbls(p_lash);
1247 free_lash_structures(p_lash);
1248 OSM_LOG_EXIT(p_log);
1253 static lash_t *lash_create(osm_opensm_t * p_osm)
1257 p_lash = calloc(1, sizeof(lash_t));
1261 p_lash->p_osm = p_osm;
1266 static void lash_delete(void *context)
1268 lash_t *p_lash = context;
1270 if (p_lash->switches) {
1272 for (id = 0; ((int)id) < p_lash->num_switches; id++)
1273 if (p_lash->switches[id])
1274 switch_delete(p_lash, p_lash->switches[id]);
1275 free(p_lash->switches);
1281 static uint8_t get_lash_sl(void *context, uint8_t path_sl_hint,
1282 const ib_net16_t slid, const ib_net16_t dlid)
1286 osm_port_t *p_src_port, *p_dst_port;
1288 lash_t *p_lash = context;
1289 osm_opensm_t *p_osm = p_lash->p_osm;
1291 if (!(p_osm->routing_engine_used &&
1292 p_osm->routing_engine_used->type == OSM_ROUTING_ENGINE_TYPE_LASH))
1293 return OSM_DEFAULT_SL;
1295 p_src_port = osm_get_port_by_lid(&p_osm->subn, slid);
1297 return OSM_DEFAULT_SL;
1299 p_dst_port = osm_get_port_by_lid(&p_osm->subn, dlid);
1301 return OSM_DEFAULT_SL;
1303 p_sw = get_osm_switch_from_port(p_dst_port);
1304 if (!p_sw || !p_sw->priv)
1305 return OSM_DEFAULT_SL;
1306 dst_id = get_lash_id(p_sw);
1308 p_sw = get_osm_switch_from_port(p_src_port);
1309 if (!p_sw || !p_sw->priv)
1310 return OSM_DEFAULT_SL;
1312 src_id = get_lash_id(p_sw);
1313 if (src_id == dst_id)
1314 return p_osm->subn.opt.lash_start_vl;
1316 return (uint8_t) ((switch_t *) p_sw->priv)->routing_table[dst_id].lane;
1319 int osm_ucast_lash_setup(struct osm_routing_engine *r, osm_opensm_t *p_osm)
1321 lash_t *p_lash = lash_create(p_osm);
1325 r->context = p_lash;
1326 r->ucast_build_fwd_tables = lash_process;
1327 r->path_sl = get_lash_sl;
1328 r->destroy = lash_delete;