2 * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5 * Copyright (c) 2007 Simula Research Laboratory. All rights reserved.
6 * Copyright (c) 2007 Silicon Graphics Inc. All rights reserved.
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
40 * Implementation of LASH algorithm Calculation functions
45 #endif /* HAVE_CONFIG_H */
50 #include <complib/cl_debug.h>
51 #include <complib/cl_qmap.h>
52 #include <opensm/osm_switch.h>
53 #include <opensm/osm_opensm.h>
54 #include <opensm/osm_log.h>
56 /* //////////////////////////// */
58 /* //////////////////////////// */
68 typedef struct _cdg_vertex {
70 struct _cdg_vertex **dependency;
76 struct _cdg_vertex *next;
79 int *num_using_this_depend;
82 typedef struct _reachable_dest {
84 struct _reachable_dest *next;
87 typedef struct _switch {
93 struct routing_table {
97 unsigned int num_connections;
98 int *virtual_physical_port_table;
99 int *phys_connections;
102 typedef struct _lash {
108 cdg_vertex_t ****cdg_vertex_matrix;
109 int *num_mst_in_lane;
110 int ***virtual_location;
113 static cdg_vertex_t *create_cdg_vertex(unsigned num_switches)
115 cdg_vertex_t *cdg_vertex = (cdg_vertex_t *) malloc(sizeof(cdg_vertex_t));
117 cdg_vertex->dependency = malloc((num_switches - 1) * sizeof(cdg_vertex_t *));
118 cdg_vertex->num_using_this_depend = (int *)malloc((num_switches - 1) * sizeof(int));
122 static void connect_switches(lash_t * p_lash, int sw1, int sw2, int phy_port_1)
124 osm_log_t *p_log = &p_lash->p_osm->log;
125 unsigned num = p_lash->switches[sw1]->num_connections;
127 p_lash->switches[sw1]->phys_connections[num] = sw2;
128 p_lash->switches[sw1]->virtual_physical_port_table[num] = phy_port_1;
129 p_lash->switches[sw1]->num_connections++;
131 OSM_LOG(p_log, OSM_LOG_VERBOSE,
132 "LASH connect: %d, %d, %d\n", sw1, sw2,
137 static osm_switch_t *get_osm_switch_from_port(osm_port_t * port)
139 osm_physp_t *p = port->p_physp;
141 return p->p_node->sw;
142 else if (p->p_remote_physp->p_node->sw)
143 return p->p_remote_physp->p_node->sw;
148 static int randint(int high)
160 static int cycle_exists(cdg_vertex_t * start, cdg_vertex_t * current,
161 cdg_vertex_t * prev, int visit_num)
164 int i, new_visit_num;
167 if (current != NULL && current->visiting_number > 0) {
168 if (visit_num > current->visiting_number && current->seen == 0) {
173 if (current == NULL) {
175 CL_ASSERT(prev == NULL);
178 current->visiting_number = visit_num;
181 prev->next = current;
182 CL_ASSERT(prev->to == current->from);
183 CL_ASSERT(prev->visiting_number > 0);
186 new_visit_num = visit_num + 1;
188 for (i = 0; i < current->num_dependencies; i++) {
190 cycle_exists(start, current->dependency[i], current,
192 if (cycle_found == 1)
193 i = current->num_dependencies;
204 static void remove_semipermanent_depend_for_sp(lash_t * p_lash, int sw,
205 int dest_switch, int lane)
207 switch_t **switches = p_lash->switches;
208 cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix;
209 int i_next_switch, output_link, i, next_link, i_next_next_switch,
214 output_link = switches[sw]->routing_table[dest_switch].out_link;
215 i_next_switch = switches[sw]->phys_connections[output_link];
217 while (sw != dest_switch) {
218 v = cdg_vertex_matrix[lane][sw][i_next_switch];
219 CL_ASSERT(v != NULL);
221 if (v->num_using_vertex == 1) {
223 cdg_vertex_matrix[lane][sw][i_next_switch] = NULL;
227 v->num_using_vertex--;
228 if (i_next_switch != dest_switch) {
230 switches[i_next_switch]->routing_table[dest_switch].out_link;
232 switches[i_next_switch]->phys_connections[next_link];
235 for (i = 0; i < v->num_dependencies; i++)
236 if (v->dependency[i] ==
237 cdg_vertex_matrix[lane][i_next_switch]
238 [i_next_next_switch]) {
245 if (v->num_using_this_depend[depend] == 1) {
247 i < v->num_dependencies - 1; i++) {
249 v->dependency[i + 1];
250 v->num_using_this_depend[i] =
251 v->num_using_this_depend[i +
255 v->num_dependencies--;
257 v->num_using_this_depend[depend]--;
262 output_link = switches[sw]->routing_table[dest_switch].out_link;
264 if (sw != dest_switch)
266 switches[sw]->phys_connections[output_link];
270 inline static void enqueue(cl_list_t * bfsq, switch_t * sw)
272 CL_ASSERT(sw->q_state == UNQUEUED);
273 sw->q_state = Q_MEMBER;
274 cl_list_insert_tail(bfsq, sw);
277 inline static void dequeue(cl_list_t * bfsq, switch_t ** sw)
279 *sw = (switch_t *) cl_list_remove_head(bfsq);
280 CL_ASSERT((*sw)->q_state == Q_MEMBER);
281 (*sw)->q_state = MST_MEMBER;
284 static int get_phys_connection(switch_t *sw, int switch_to)
288 for (i = 0; i < sw->num_connections; i++)
289 if (sw->phys_connections[i] == switch_to)
294 static void shortest_path(lash_t * p_lash, int ir)
296 switch_t **switches = p_lash->switches, *sw, *swi;
300 cl_list_construct(&bfsq);
301 cl_list_init(&bfsq, 20);
303 enqueue(&bfsq, switches[ir]);
305 while (!cl_is_list_empty(&bfsq)) {
307 for (i = 0; i < sw->num_connections; i++) {
308 swi = switches[sw->phys_connections[i]];
309 if (swi->q_state == UNQUEUED) {
311 sw->dij_channels[sw->used_channels++] = swi->id;
316 cl_list_destroy(&bfsq);
319 static void generate_routing_func_for_mst(lash_t * p_lash, int sw_id,
320 reachable_dest_t ** destinations)
323 switch_t *sw = p_lash->switches[sw_id];
324 int num_channels = sw->used_channels;
325 reachable_dest_t *dest, *i_dest, *concat_dest = NULL, *prev;
327 for (i = 0; i < num_channels; i++) {
328 next_switch = sw->dij_channels[i];
329 generate_routing_func_for_mst(p_lash, next_switch, &dest);
334 while (i_dest != NULL) {
335 if (sw->routing_table[i_dest->switch_id].out_link ==
337 sw->routing_table[i_dest->switch_id].out_link =
338 get_phys_connection(sw, next_switch);
342 i_dest = i_dest->next;
345 CL_ASSERT(prev->next == NULL);
346 prev->next = concat_dest;
350 i_dest = (reachable_dest_t *) malloc(sizeof(reachable_dest_t));
351 i_dest->switch_id = sw->id;
352 i_dest->next = concat_dest;
353 *destinations = i_dest;
356 static void generate_cdg_for_sp(lash_t * p_lash, int sw, int dest_switch,
359 unsigned num_switches = p_lash->num_switches;
360 switch_t **switches = p_lash->switches;
361 cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix;
362 int next_switch, output_link, j, exists;
363 cdg_vertex_t *v, *prev = NULL;
365 output_link = switches[sw]->routing_table[dest_switch].out_link;
366 next_switch = switches[sw]->phys_connections[output_link];
368 while (sw != dest_switch) {
370 if (cdg_vertex_matrix[lane][sw][next_switch] == NULL) {
372 v = create_cdg_vertex(num_switches);
374 for (i = 0; i < num_switches - 1; i++) {
375 v->dependency[i] = NULL;
376 v->num_using_this_depend[i] = 0;
379 v->num_using_vertex = 0;
380 v->num_dependencies = 0;
384 v->visiting_number = 0;
387 v->num_temp_depend = 0;
389 cdg_vertex_matrix[lane][sw][next_switch] = v;
391 v = cdg_vertex_matrix[lane][sw][next_switch];
393 v->num_using_vertex++;
398 for (j = 0; j < prev->num_dependencies; j++)
399 if (prev->dependency[j] == v) {
401 prev->num_using_this_depend[j]++;
405 prev->dependency[prev->num_dependencies] = v;
406 prev->num_using_this_depend[prev->num_dependencies]++;
407 prev->num_dependencies++;
409 CL_ASSERT(prev->num_dependencies < (int)num_switches);
412 prev->num_temp_depend++;
418 output_link = switches[sw]->routing_table[dest_switch].out_link;
420 if (sw != dest_switch) {
421 CL_ASSERT(output_link != NONE);
422 next_switch = switches[sw]->phys_connections[output_link];
429 static void set_temp_depend_to_permanent_for_sp(lash_t * p_lash, int sw,
430 int dest_switch, int lane)
432 switch_t **switches = p_lash->switches;
433 cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix;
434 int next_switch, output_link;
437 output_link = switches[sw]->routing_table[dest_switch].out_link;
438 next_switch = switches[sw]->phys_connections[output_link];
440 while (sw != dest_switch) {
441 v = cdg_vertex_matrix[lane][sw][next_switch];
442 CL_ASSERT(v != NULL);
447 v->num_temp_depend = 0;
450 output_link = switches[sw]->routing_table[dest_switch].out_link;
452 if (sw != dest_switch)
454 switches[sw]->phys_connections[output_link];
459 static void remove_temp_depend_for_sp(lash_t * p_lash, int sw, int dest_switch,
462 switch_t **switches = p_lash->switches;
463 cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix;
464 int next_switch, output_link, i;
467 output_link = switches[sw]->routing_table[dest_switch].out_link;
468 next_switch = switches[sw]->phys_connections[output_link];
470 while (sw != dest_switch) {
471 v = cdg_vertex_matrix[lane][sw][next_switch];
472 CL_ASSERT(v != NULL);
475 cdg_vertex_matrix[lane][sw][next_switch] = NULL;
478 CL_ASSERT(v->num_temp_depend <= v->num_dependencies);
479 v->num_dependencies =
480 v->num_dependencies - v->num_temp_depend;
481 v->num_temp_depend = 0;
482 v->num_using_vertex--;
484 for (i = v->num_dependencies;
485 i < p_lash->num_switches - 1; i++)
486 v->num_using_this_depend[i] = 0;
490 output_link = switches[sw]->routing_table[dest_switch].out_link;
492 if (sw != dest_switch)
494 switches[sw]->phys_connections[output_link];
499 static void balance_virtual_lanes(lash_t * p_lash, unsigned lanes_needed)
501 unsigned num_switches = p_lash->num_switches;
502 cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix;
503 int *num_mst_in_lane = p_lash->num_mst_in_lane;
504 int ***virtual_location = p_lash->virtual_location;
505 int min_filled_lane, max_filled_lane, medium_filled_lane, trials;
506 int old_min_filled_lane, old_max_filled_lane, new_num_min_lane,
509 int src, dest, start, next_switch, output_link;
510 int next_switch2, output_link2;
511 int stop = 0, cycle_found;
515 min_filled_lane = lanes_needed - 1;
517 if (max_filled_lane > 1)
518 medium_filled_lane = max_filled_lane - 1;
520 trials = num_mst_in_lane[max_filled_lane];
521 if (lanes_needed == 1)
525 src = abs(rand()) % (num_switches);
526 dest = abs(rand()) % (num_switches);
528 while (virtual_location[src][dest][max_filled_lane] != 1) {
530 if (dest == num_switches - 1)
536 && virtual_location[src][dest][max_filled_lane]
538 if (dest == num_switches - 1)
544 if (virtual_location[src][dest][max_filled_lane] != 1) {
545 if (src == num_switches - 1)
552 generate_cdg_for_sp(p_lash, src, dest, min_filled_lane);
553 generate_cdg_for_sp(p_lash, dest, src, min_filled_lane);
555 output_link = p_lash->switches[src]->routing_table[dest].out_link;
556 next_switch = p_lash->switches[src]->phys_connections[output_link];
558 output_link2 = p_lash->switches[dest]->routing_table[src].out_link;
559 next_switch2 = p_lash->switches[dest]->phys_connections[output_link2];
561 CL_ASSERT(cdg_vertex_matrix[min_filled_lane][src][next_switch] != NULL);
562 CL_ASSERT(cdg_vertex_matrix[min_filled_lane][dest][next_switch2] != NULL);
565 cycle_exists(cdg_vertex_matrix[min_filled_lane][src][next_switch], NULL, NULL,
568 cycle_exists(cdg_vertex_matrix[min_filled_lane][dest][next_switch2], NULL, NULL,
571 for (i = 0; i < num_switches; i++)
572 for (j = 0; j < num_switches; j++)
573 if (cdg_vertex_matrix[min_filled_lane][i][j] != NULL) {
574 cdg_vertex_matrix[min_filled_lane][i][j]->visiting_number =
576 cdg_vertex_matrix[min_filled_lane][i][j]->seen = 0;
579 if (cycle_found == 1 || cycle_found2 == 1) {
580 remove_temp_depend_for_sp(p_lash, src, dest, min_filled_lane);
581 remove_temp_depend_for_sp(p_lash, dest, src, min_filled_lane);
583 virtual_location[src][dest][max_filled_lane] = 2;
584 virtual_location[dest][src][max_filled_lane] = 2;
588 set_temp_depend_to_permanent_for_sp(p_lash, src, dest, min_filled_lane);
589 set_temp_depend_to_permanent_for_sp(p_lash, dest, src, min_filled_lane);
591 num_mst_in_lane[max_filled_lane]--;
592 num_mst_in_lane[max_filled_lane]--;
593 num_mst_in_lane[min_filled_lane]++;
594 num_mst_in_lane[min_filled_lane]++;
596 remove_semipermanent_depend_for_sp(p_lash, src, dest, max_filled_lane);
597 remove_semipermanent_depend_for_sp(p_lash, dest, src, max_filled_lane);
598 virtual_location[src][dest][max_filled_lane] = 0;
599 virtual_location[dest][src][max_filled_lane] = 0;
600 virtual_location[src][dest][min_filled_lane] = 1;
601 virtual_location[dest][src][min_filled_lane] = 1;
602 p_lash->switches[src]->routing_table[dest].lane = min_filled_lane;
603 p_lash->switches[dest]->routing_table[src].lane = min_filled_lane;
609 if (num_mst_in_lane[max_filled_lane] - num_mst_in_lane[min_filled_lane] <
610 p_lash->balance_limit)
614 old_min_filled_lane = min_filled_lane;
615 old_max_filled_lane = max_filled_lane;
617 new_num_min_lane = MAX_INT;
618 new_num_max_lane = 0;
620 for (i = 0; i < lanes_needed; i++) {
622 if (num_mst_in_lane[i] < new_num_min_lane) {
623 new_num_min_lane = num_mst_in_lane[i];
627 if (num_mst_in_lane[i] > new_num_max_lane) {
628 new_num_max_lane = num_mst_in_lane[i];
633 if (old_min_filled_lane != min_filled_lane) {
634 trials = num_mst_in_lane[max_filled_lane];
635 for (i = 0; i < num_switches; i++)
636 for (j = 0; j < num_switches; j++)
637 if (virtual_location[i][j][max_filled_lane] == 2)
638 virtual_location[i][j][max_filled_lane] = 1;
641 if (old_max_filled_lane != max_filled_lane) {
642 trials = num_mst_in_lane[max_filled_lane];
643 for (i = 0; i < num_switches; i++)
644 for (j = 0; j < num_switches; j++)
645 if (virtual_location[i][j][old_max_filled_lane] == 2)
646 virtual_location[i][j][old_max_filled_lane] = 1;
651 static switch_t *switch_create(lash_t * p_lash, unsigned id, osm_switch_t * p_sw)
653 unsigned num_switches = p_lash->num_switches;
654 unsigned num_ports = p_sw->num_ports;
658 sw = malloc(sizeof(*sw));
662 memset(sw, 0, sizeof(*sw));
665 sw->dij_channels = malloc(num_ports * sizeof(int));
666 if (!sw->dij_channels) {
671 sw->virtual_physical_port_table = malloc(num_ports * sizeof(int));
672 if (!sw->virtual_physical_port_table) {
673 free(sw->dij_channels);
678 sw->phys_connections = malloc(num_ports * sizeof(int));
679 if (!sw->phys_connections) {
680 free(sw->virtual_physical_port_table);
681 free(sw->dij_channels);
686 sw->routing_table = malloc(num_switches * sizeof(sw->routing_table[0]));
687 if (!sw->routing_table) {
688 free(sw->phys_connections);
689 free(sw->virtual_physical_port_table);
690 free(sw->dij_channels);
695 for (i = 0; i < num_switches; i++) {
696 sw->routing_table[i].out_link = NONE;
697 sw->routing_table[i].lane = NONE;
700 for (i = 0; i < num_ports; i++) {
701 sw->virtual_physical_port_table[i] = -1;
702 sw->phys_connections[i] = NONE;
712 static void switch_delete(switch_t * sw)
714 if (sw->dij_channels)
715 free(sw->dij_channels);
716 if (sw->virtual_physical_port_table)
717 free(sw->virtual_physical_port_table);
718 if (sw->phys_connections)
719 free(sw->phys_connections);
720 if (sw->routing_table)
721 free(sw->routing_table);
725 static void free_lash_structures(lash_t * p_lash)
727 unsigned int i, j, k;
728 unsigned num_switches = p_lash->num_switches;
729 osm_log_t *p_log = &p_lash->p_osm->log;
731 OSM_LOG_ENTER(p_log);
733 // free cdg_vertex_matrix
734 for (i = 0; i < p_lash->vl_min; i++) {
735 for (j = 0; j < num_switches; j++) {
736 for (k = 0; k < num_switches; k++) {
737 if (p_lash->cdg_vertex_matrix[i][j][k]) {
739 if (p_lash->cdg_vertex_matrix[i][j][k]->dependency)
740 free(p_lash->cdg_vertex_matrix[i][j][k]->
743 if (p_lash->cdg_vertex_matrix[i][j][k]->
744 num_using_this_depend)
745 free(p_lash->cdg_vertex_matrix[i][j][k]->
746 num_using_this_depend);
748 free(p_lash->cdg_vertex_matrix[i][j][k]);
751 if (p_lash->cdg_vertex_matrix[i][j])
752 free(p_lash->cdg_vertex_matrix[i][j]);
754 if (p_lash->cdg_vertex_matrix[i])
755 free(p_lash->cdg_vertex_matrix[i]);
758 if (p_lash->cdg_vertex_matrix)
759 free(p_lash->cdg_vertex_matrix);
761 // free virtual_location
762 for (i = 0; i < num_switches; i++) {
763 for (j = 0; j < num_switches; j++) {
764 if (p_lash->virtual_location[i][j])
765 free(p_lash->virtual_location[i][j]);
767 if (p_lash->virtual_location[i])
768 free(p_lash->virtual_location[i]);
770 if (p_lash->virtual_location)
771 free(p_lash->virtual_location);
773 if (p_lash->num_mst_in_lane)
774 free(p_lash->num_mst_in_lane);
779 static int init_lash_structures(lash_t * p_lash)
781 unsigned vl_min = p_lash->vl_min;
782 unsigned num_switches = p_lash->num_switches;
783 osm_log_t *p_log = &p_lash->p_osm->log;
785 unsigned int i, j, k;
787 OSM_LOG_ENTER(p_log);
789 // initialise cdg_vertex_matrix[num_switches][num_switches][num_switches]
790 p_lash->cdg_vertex_matrix =
791 (cdg_vertex_t ****) malloc(vl_min * sizeof(cdg_vertex_t ****));
792 for (i = 0; i < vl_min; i++) {
793 p_lash->cdg_vertex_matrix[i] =
794 (cdg_vertex_t ***) malloc(num_switches *
795 sizeof(cdg_vertex_t ***));
797 if (p_lash->cdg_vertex_matrix[i] == NULL)
801 for (i = 0; i < vl_min; i++) {
802 for (j = 0; j < num_switches; j++) {
803 p_lash->cdg_vertex_matrix[i][j] =
804 (cdg_vertex_t **) malloc(num_switches *
805 sizeof(cdg_vertex_t **));
806 if (p_lash->cdg_vertex_matrix[i][j] == NULL)
809 for (k = 0; k < num_switches; k++) {
810 p_lash->cdg_vertex_matrix[i][j][k] = NULL;
815 // initialise virtual_location[num_switches][num_switches][num_layers],
817 p_lash->virtual_location =
818 (int ***)malloc(num_switches * sizeof(int ***));
819 if (p_lash->virtual_location == NULL)
822 for (i = 0; i < num_switches; i++) {
823 p_lash->virtual_location[i] =
824 (int **)malloc(num_switches * sizeof(int **));
825 if (p_lash->virtual_location[i] == NULL)
829 for (i = 0; i < num_switches; i++) {
830 for (j = 0; j < num_switches; j++) {
831 p_lash->virtual_location[i][j] =
832 (int *)malloc(vl_min * sizeof(int *));
833 if (p_lash->virtual_location[i][j] == NULL)
835 for (k = 0; k < vl_min; k++) {
836 p_lash->virtual_location[i][j][k] = 0;
841 // initialise num_mst_in_lane[num_switches], default 0
842 p_lash->num_mst_in_lane = (int *)malloc(num_switches * sizeof(int));
843 if (p_lash->num_mst_in_lane == NULL)
845 memset(p_lash->num_mst_in_lane, 0,
846 num_switches * sizeof(p_lash->num_mst_in_lane[0]));
852 OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D01: "
853 "Could not allocate required memory for LASH errno %d, errno %d for lack of memory\n",
861 static int lash_core(lash_t * p_lash)
863 osm_log_t *p_log = &p_lash->p_osm->log;
864 unsigned num_switches = p_lash->num_switches;
865 switch_t **switches = p_lash->switches;
866 unsigned lanes_needed = 1;
867 unsigned int i, j, k, dest_switch = 0;
868 reachable_dest_t *dests, *idest;
871 int stop = 0, output_link, i_next_switch;
872 int output_link2, i_next_switch2;
873 int cycle_found2 = 0;
875 int *switch_bitmap; /* Bitmap to check if we have processed this pair */
877 OSM_LOG_ENTER(p_log);
879 for (i = 0; i < num_switches; i++) {
881 shortest_path(p_lash, i);
882 generate_routing_func_for_mst(p_lash, i, &dests);
885 while (idest != NULL) {
891 for (j = 0; j < num_switches; j++) {
892 switches[j]->used_channels = 0;
893 switches[j]->q_state = UNQUEUED;
897 switch_bitmap = malloc(num_switches * num_switches * sizeof(int));
898 if (!switch_bitmap) {
899 OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D04: "
900 "Failed allocating switch_bitmap - out of memory\n");
903 memset(switch_bitmap, 0, num_switches * num_switches * sizeof(int));
905 for (i = 0; i < num_switches; i++) {
906 for (dest_switch = 0; dest_switch < num_switches; dest_switch++)
907 if (dest_switch != i && switch_bitmap[i * num_switches + dest_switch] == 0) {
910 while (v_lane < lanes_needed && stop == 0) {
911 generate_cdg_for_sp(p_lash, i, dest_switch, v_lane);
912 generate_cdg_for_sp(p_lash, dest_switch, i, v_lane);
915 switches[i]->routing_table[dest_switch].out_link;
917 switches[dest_switch]->routing_table[i].out_link;
919 i_next_switch = switches[i]->phys_connections[output_link];
921 switches[dest_switch]->phys_connections[output_link2];
924 cdg_vertex_matrix[v_lane][i][i_next_switch] !=
927 cdg_vertex_matrix[v_lane][dest_switch]
928 [i_next_switch2] != NULL);
931 cycle_exists(p_lash->
932 cdg_vertex_matrix[v_lane][i]
933 [i_next_switch], NULL, NULL, 1);
935 cycle_exists(p_lash->
936 cdg_vertex_matrix[v_lane][dest_switch]
937 [i_next_switch2], NULL, NULL, 1);
939 for (j = 0; j < num_switches; j++)
940 for (k = 0; k < num_switches; k++)
942 cdg_vertex_matrix[v_lane][j][k] !=
945 cdg_vertex_matrix[v_lane][j]
946 [k]->visiting_number = 0;
948 cdg_vertex_matrix[v_lane][j]
952 if (cycle_found == 1 || cycle_found2 == 1) {
953 remove_temp_depend_for_sp(p_lash, i, dest_switch,
955 remove_temp_depend_for_sp(p_lash, dest_switch, i,
959 set_temp_depend_to_permanent_for_sp(p_lash, i,
962 set_temp_depend_to_permanent_for_sp(p_lash,
966 p_lash->num_mst_in_lane[v_lane]++;
967 p_lash->num_mst_in_lane[v_lane]++;
971 switches[i]->routing_table[dest_switch].lane = v_lane;
972 switches[dest_switch]->routing_table[i].lane = v_lane;
974 if (cycle_found == 1 || cycle_found2 == 1) {
975 if (++lanes_needed > p_lash->vl_min)
976 goto Error_Not_Enough_Lanes;
978 generate_cdg_for_sp(p_lash, i, dest_switch, v_lane);
979 generate_cdg_for_sp(p_lash, dest_switch, i, v_lane);
981 set_temp_depend_to_permanent_for_sp(p_lash, i, dest_switch,
983 set_temp_depend_to_permanent_for_sp(p_lash, dest_switch, i,
986 p_lash->num_mst_in_lane[v_lane]++;
987 p_lash->num_mst_in_lane[v_lane]++;
989 p_lash->virtual_location[i][dest_switch][v_lane] = 1;
990 p_lash->virtual_location[dest_switch][i][v_lane] = 1;
992 switch_bitmap[i * num_switches + dest_switch] = 1;
993 switch_bitmap[dest_switch * num_switches + i] = 1;
997 OSM_LOG(p_log, OSM_LOG_INFO,
998 "Lanes needed: %d, Balancing\n", lanes_needed);
1000 for (i = 0; i < lanes_needed; i++) {
1001 OSM_LOG(p_log, OSM_LOG_INFO, "Lanes in layer %d: %d\n",
1002 i, p_lash->num_mst_in_lane[i]);
1005 balance_virtual_lanes(p_lash, lanes_needed);
1007 for (i = 0; i < lanes_needed; i++) {
1008 OSM_LOG(p_log, OSM_LOG_INFO, "Lanes in layer %d: %d\n",
1009 i, p_lash->num_mst_in_lane[i]);
1014 Error_Not_Enough_Lanes:
1016 OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D02: "
1017 "Lane requirements (%d) exceed available lanes (%d)\n",
1018 p_lash->vl_min, lanes_needed);
1021 free(switch_bitmap);
1022 OSM_LOG_EXIT(p_log);
1026 static unsigned get_lash_id(osm_switch_t * p_sw)
1028 return ((switch_t *) p_sw->priv)->id;
1031 static void populate_fwd_tbls(lash_t * p_lash)
1033 osm_log_t *p_log = &p_lash->p_osm->log;
1034 osm_subn_t *p_subn = &p_lash->p_osm->subn;
1035 osm_opensm_t *p_osm = p_lash->p_osm;
1036 osm_switch_t *p_sw, *p_next_sw, *p_dst_sw;
1038 uint16_t max_lid_ho, lid;
1040 OSM_LOG_ENTER(p_log);
1042 p_next_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl);
1044 // Go through each swtich individually
1045 while (p_next_sw != (osm_switch_t *) cl_qmap_end(&p_subn->sw_guid_tbl)) {
1046 uint64_t current_guid;
1049 p_next_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
1051 max_lid_ho = p_sw->max_lid_ho;
1052 current_guid = p_sw->p_node->node_info.port_guid;
1055 memset(p_sw->new_lft, OSM_NO_PATH, IB_LID_UCAST_END_HO + 1);
1057 for (lid = 1; lid <= max_lid_ho; lid++) {
1058 port = cl_ptr_vector_get(&p_subn->port_lid_tbl, lid);
1062 p_dst_sw = get_osm_switch_from_port(port);
1063 if (p_dst_sw == p_sw) {
1064 uint8_t egress_port = port->p_node->sw ? 0 :
1065 port->p_physp->p_remote_physp->port_num;
1066 p_sw->new_lft[lid] = egress_port;
1067 OSM_LOG(p_log, OSM_LOG_VERBOSE,
1068 "LASH fwd MY SRC SRC GUID 0x%016" PRIx64
1069 " src lash id (%d), src lid no (%u) src lash port (%d) "
1070 "DST GUID 0x%016" PRIx64
1071 " src lash id (%d), src lash port (%d)\n",
1072 cl_ntoh64(current_guid), -1, lid,
1073 egress_port, cl_ntoh64(current_guid),
1075 } else if (p_dst_sw) {
1076 unsigned dst_lash_switch_id =
1077 get_lash_id(p_dst_sw);
1078 uint8_t lash_egress_port =
1080 routing_table[dst_lash_switch_id].out_link;
1081 uint8_t physical_egress_port =
1083 virtual_physical_port_table
1086 p_sw->new_lft[lid] = physical_egress_port;
1087 OSM_LOG(p_log, OSM_LOG_VERBOSE,
1088 "LASH fwd SRC GUID 0x%016" PRIx64
1089 " src lash id (%d), "
1090 "src lid no (%u) src lash port (%d) "
1091 "DST GUID 0x%016" PRIx64
1092 " src lash id (%d), src lash port (%d)\n",
1093 cl_ntoh64(current_guid), sw->id, lid,
1095 cl_ntoh64(p_dst_sw->p_node->node_info.
1098 physical_egress_port);
1101 osm_ucast_mgr_set_fwd_table(&p_osm->sm.ucast_mgr, p_sw);
1103 OSM_LOG_EXIT(p_log);
1106 static void osm_lash_process_switch(lash_t * p_lash, osm_switch_t * p_sw)
1108 osm_log_t *p_log = &p_lash->p_osm->log;
1110 osm_physp_t *p_current_physp, *p_remote_physp;
1111 unsigned switch_a_lash_id, switch_b_lash_id;
1113 OSM_LOG_ENTER(p_log);
1115 switch_a_lash_id = get_lash_id(p_sw);
1116 port_count = osm_node_get_num_physp(p_sw->p_node);
1118 // starting at port 1, ignoring management port on switch
1119 for (i = 1; i < port_count; i++) {
1121 p_current_physp = osm_node_get_physp_ptr(p_sw->p_node, i);
1122 if (p_current_physp) {
1123 p_remote_physp = p_current_physp->p_remote_physp;
1124 if (p_remote_physp && p_remote_physp->p_node->sw) {
1125 int physical_port_a_num =
1126 osm_physp_get_port_num(p_current_physp);
1127 int physical_port_b_num =
1128 osm_physp_get_port_num(p_remote_physp);
1130 get_lash_id(p_remote_physp->p_node->sw);
1132 connect_switches(p_lash, switch_a_lash_id,
1134 physical_port_a_num);
1135 OSM_LOG(p_log, OSM_LOG_VERBOSE,
1136 "LASH SUCCESS connected G 0x%016" PRIx64
1137 " , lash_id(%u), P(%u) " " to G 0x%016"
1138 PRIx64 " , lash_id(%u) , P(%u)\n",
1139 cl_ntoh64(osm_physp_get_port_guid
1141 switch_a_lash_id, physical_port_a_num,
1142 cl_ntoh64(osm_physp_get_port_guid
1144 switch_b_lash_id, physical_port_b_num);
1149 OSM_LOG_EXIT(p_log);
1152 static void lash_cleanup(lash_t * p_lash)
1154 osm_subn_t *p_subn = &p_lash->p_osm->subn;
1155 osm_switch_t *p_next_sw, *p_sw;
1157 /* drop any existing references to old lash switches */
1158 p_next_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl);
1159 while (p_next_sw != (osm_switch_t *) cl_qmap_end(&p_subn->sw_guid_tbl)) {
1161 p_next_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
1165 if (p_lash->switches) {
1167 for (id = 0; ((int)id) < p_lash->num_switches; id++)
1168 if (p_lash->switches[id])
1169 switch_delete(p_lash->switches[id]);
1170 free(p_lash->switches);
1172 p_lash->switches = NULL;
1176 static int discover_network_properties()
1177 Traverse the topology of the network in order to determine
1178 - the maximum number of switches,
1179 - the minimum number of virtual layers
1182 static int discover_network_properties(lash_t * p_lash)
1186 osm_subn_t *p_subn = &p_lash->p_osm->subn;
1187 osm_switch_t *p_next_sw, *p_sw;
1188 osm_log_t *p_log = &p_lash->p_osm->log;
1190 p_lash->num_switches = cl_qmap_count(&p_subn->sw_guid_tbl);
1192 p_lash->switches = malloc(p_lash->num_switches * sizeof(switch_t *));
1193 if (!p_lash->switches)
1195 memset(p_lash->switches, 0, p_lash->num_switches * sizeof(switch_t *));
1197 vl_min = 5; // set to a high value
1199 p_next_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl);
1200 while (p_next_sw != (osm_switch_t *) cl_qmap_end(&p_subn->sw_guid_tbl)) {
1201 uint16_t port_count;
1203 p_next_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
1205 p_lash->switches[id] = switch_create(p_lash, id, p_sw);
1206 if (!p_lash->switches[id])
1210 port_count = osm_node_get_num_physp(p_sw->p_node);
1212 // Note, ignoring port 0. management port
1213 for (i = 1; i < port_count; i++) {
1214 osm_physp_t *p_current_physp =
1215 osm_node_get_physp_ptr(p_sw->p_node, i);
1218 && p_current_physp->p_remote_physp) {
1220 ib_port_info_t *p_port_info =
1221 &p_current_physp->port_info;
1222 uint8_t port_vl_min =
1223 ib_port_info_get_op_vls(p_port_info);
1224 if (port_vl_min && port_vl_min < vl_min)
1225 vl_min = port_vl_min;
1230 vl_min = 1 << (vl_min - 1);
1234 p_lash->vl_min = vl_min;
1236 OSM_LOG(p_log, OSM_LOG_INFO,
1237 "min operational vl(%d) max_switches(%d)\n", p_lash->vl_min,
1238 p_lash->num_switches);
1242 static void process_switches(lash_t * p_lash)
1244 osm_switch_t *p_sw, *p_next_sw;
1245 osm_subn_t *p_subn = &p_lash->p_osm->subn;
1247 /* Go through each swithc and process it. i.e build the connection
1248 structure required by LASH */
1249 p_next_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl);
1250 while (p_next_sw != (osm_switch_t *) cl_qmap_end(&p_subn->sw_guid_tbl)) {
1252 p_next_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
1254 osm_lash_process_switch(p_lash, p_sw);
1258 static int lash_process(void *context)
1260 lash_t *p_lash = context;
1261 osm_log_t *p_log = &p_lash->p_osm->log;
1262 int return_status = IB_SUCCESS;
1264 OSM_LOG_ENTER(p_log);
1266 p_lash->balance_limit = 6;
1268 // everything starts here
1269 lash_cleanup(p_lash);
1271 discover_network_properties(p_lash);
1273 return_status = init_lash_structures(p_lash);
1274 if (return_status != IB_SUCCESS)
1277 process_switches(p_lash);
1279 return_status = lash_core(p_lash);
1280 if (return_status != IB_SUCCESS)
1283 populate_fwd_tbls(p_lash);
1286 free_lash_structures(p_lash);
1287 OSM_LOG_EXIT(p_log);
1289 return return_status;
1292 static lash_t *lash_create(osm_opensm_t * p_osm)
1296 p_lash = malloc(sizeof(lash_t));
1300 memset(p_lash, 0, sizeof(lash_t));
1301 p_lash->p_osm = p_osm;
1306 static void lash_delete(void *context)
1308 lash_t *p_lash = context;
1309 if (p_lash->switches) {
1311 for (id = 0; ((int)id) < p_lash->num_switches; id++)
1312 if (p_lash->switches[id])
1313 switch_delete(p_lash->switches[id]);
1314 free(p_lash->switches);
1319 uint8_t osm_get_lash_sl(osm_opensm_t * p_osm, osm_port_t * p_src_port,
1320 osm_port_t * p_dst_port)
1326 if (p_osm->routing_engine_used != OSM_ROUTING_ENGINE_TYPE_LASH)
1327 return OSM_DEFAULT_SL;
1329 p_sw = get_osm_switch_from_port(p_dst_port);
1330 if (!p_sw || !p_sw->priv)
1331 return OSM_DEFAULT_SL;
1332 dst_id = get_lash_id(p_sw);
1334 p_sw = get_osm_switch_from_port(p_src_port);
1335 if (!p_sw || !p_sw->priv)
1336 return OSM_DEFAULT_SL;
1338 src_id = get_lash_id(p_sw);
1339 if (src_id == dst_id)
1340 return OSM_DEFAULT_SL;
1342 return (uint8_t) ((switch_t *) p_sw->priv)->routing_table[dst_id].lane;
1345 int osm_ucast_lash_setup(struct osm_routing_engine *r, osm_opensm_t *p_osm)
1347 lash_t *p_lash = lash_create(p_osm);
1351 r->context = p_lash;
1352 r->ucast_build_fwd_tables = lash_process;
1353 r->delete = lash_delete;