2 * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5 * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved.
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
39 * Implementation of osm_mcast_mgr_t.
40 * This file implements the Multicast Manager object.
45 #endif /* HAVE_CONFIG_H */
49 #include <iba/ib_types.h>
50 #include <complib/cl_debug.h>
51 #include <opensm/osm_opensm.h>
52 #include <opensm/osm_sm.h>
53 #include <opensm/osm_multicast.h>
54 #include <opensm/osm_node.h>
55 #include <opensm/osm_switch.h>
56 #include <opensm/osm_helper.h>
57 #include <opensm/osm_msgdef.h>
59 /**********************************************************************
60 **********************************************************************/
61 typedef struct osm_mcast_work_obj {
62 cl_list_item_t list_item;
64 } osm_mcast_work_obj_t;
66 /**********************************************************************
67 **********************************************************************/
68 static osm_mcast_work_obj_t *__osm_mcast_work_obj_new(IN const osm_port_t *
72 TO DO - get these objects from a lockpool.
74 osm_mcast_work_obj_t *p_obj;
77 clean allocated memory to avoid assertion when trying to insert to
79 see cl_qlist_insert_tail(): CL_ASSERT(p_list_item->p_list != p_list)
81 p_obj = malloc(sizeof(*p_obj));
83 memset(p_obj, 0, sizeof(*p_obj));
84 p_obj->p_port = (osm_port_t *) p_port;
90 /**********************************************************************
91 **********************************************************************/
92 static void __osm_mcast_work_obj_delete(IN osm_mcast_work_obj_t * p_wobj)
97 /**********************************************************************
98 Recursively remove nodes from the tree
99 *********************************************************************/
100 static void __osm_mcast_mgr_purge_tree_node(IN osm_mtree_node_t * p_mtn)
104 for (i = 0; i < p_mtn->max_children; i++) {
105 if (p_mtn->child_array[i] &&
106 (p_mtn->child_array[i] != OSM_MTREE_LEAF))
107 __osm_mcast_mgr_purge_tree_node(p_mtn->child_array[i]);
109 p_mtn->child_array[i] = NULL;
116 /**********************************************************************
117 **********************************************************************/
119 __osm_mcast_mgr_purge_tree(osm_sm_t * sm, IN osm_mgrp_t * const p_mgrp)
121 OSM_LOG_ENTER(sm->p_log);
124 __osm_mcast_mgr_purge_tree_node(p_mgrp->p_root);
126 p_mgrp->p_root = NULL;
128 OSM_LOG_EXIT(sm->p_log);
131 /**********************************************************************
132 **********************************************************************/
134 osm_mcast_mgr_compute_avg_hops(osm_sm_t * sm,
135 const osm_mgrp_t * const p_mgrp,
136 const osm_switch_t * const p_sw)
140 uint32_t num_ports = 0;
141 const osm_port_t *p_port;
142 const osm_mcm_port_t *p_mcm_port;
143 const cl_qmap_t *p_mcm_tbl;
145 OSM_LOG_ENTER(sm->p_log);
147 p_mcm_tbl = &p_mgrp->mcm_port_tbl;
150 For each member of the multicast group, compute the
151 number of hops to its base LID.
153 for (p_mcm_port = (osm_mcm_port_t *) cl_qmap_head(p_mcm_tbl);
154 p_mcm_port != (osm_mcm_port_t *) cl_qmap_end(p_mcm_tbl);
156 (osm_mcm_port_t *) cl_qmap_next(&p_mcm_port->map_item)) {
158 Acquire the port object for this port guid, then create
159 the new worker object to build the list.
161 p_port = osm_get_port_by_guid(sm->p_subn,
162 ib_gid_get_guid(&p_mcm_port->
166 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A18: "
167 "No port object for port 0x%016" PRIx64 "\n",
168 cl_ntoh64(ib_gid_get_guid
169 (&p_mcm_port->port_gid)));
173 hops += osm_switch_get_port_least_hops(p_sw, p_port);
178 We should be here if there aren't any ports in the group.
180 CL_ASSERT(num_ports);
183 avg_hops = (float)(hops / num_ports);
185 OSM_LOG_EXIT(sm->p_log);
189 /**********************************************************************
190 Calculate the maximal "min hops" from the given switch to any
192 **********************************************************************/
194 osm_mcast_mgr_compute_max_hops(osm_sm_t * sm,
195 const osm_mgrp_t * const p_mgrp,
196 const osm_switch_t * const p_sw)
198 uint32_t max_hops = 0;
200 const osm_port_t *p_port;
201 const osm_mcm_port_t *p_mcm_port;
202 const cl_qmap_t *p_mcm_tbl;
204 OSM_LOG_ENTER(sm->p_log);
206 p_mcm_tbl = &p_mgrp->mcm_port_tbl;
209 For each member of the multicast group, compute the
210 number of hops to its base LID.
212 for (p_mcm_port = (osm_mcm_port_t *) cl_qmap_head(p_mcm_tbl);
213 p_mcm_port != (osm_mcm_port_t *) cl_qmap_end(p_mcm_tbl);
215 (osm_mcm_port_t *) cl_qmap_next(&p_mcm_port->map_item)) {
217 Acquire the port object for this port guid, then create
218 the new worker object to build the list.
220 p_port = osm_get_port_by_guid(sm->p_subn,
221 ib_gid_get_guid(&p_mcm_port->
225 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A1A: "
226 "No port object for port 0x%016" PRIx64 "\n",
227 cl_ntoh64(ib_gid_get_guid
228 (&p_mcm_port->port_gid)));
232 hops = osm_switch_get_port_least_hops(p_sw, p_port);
239 We should be here if there aren't any ports in the group.
241 max_hops = 10001; /* see later - we use it to realize no hops */
244 OSM_LOG_EXIT(sm->p_log);
245 return (float)(max_hops);
248 /**********************************************************************
249 This function attempts to locate the optimal switch for the
250 center of the spanning tree. The current algorithm chooses
251 a switch with the lowest average hop count to the members
252 of the multicast group.
253 **********************************************************************/
254 static osm_switch_t *__osm_mcast_mgr_find_optimal_switch(osm_sm_t * sm,
259 const osm_switch_t *p_sw;
260 const osm_switch_t *p_best_sw = NULL;
262 float best_hops = 10000; /* any big # will do */
263 #ifdef OSM_VENDOR_INTF_ANAFA
264 boolean_t use_avg_hops = TRUE; /* anafa2 - bug hca on switch *//* use max hops for root */
266 boolean_t use_avg_hops = FALSE; /* use max hops for root */
269 OSM_LOG_ENTER(sm->p_log);
271 p_sw_tbl = &sm->p_subn->sw_guid_tbl;
273 CL_ASSERT(!osm_mgrp_is_empty(p_mgrp));
275 for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
276 p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl);
277 p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) {
278 if (!osm_switch_supports_mcast(p_sw))
282 hops = osm_mcast_mgr_compute_avg_hops(sm, p_mgrp, p_sw);
284 hops = osm_mcast_mgr_compute_max_hops(sm, p_mgrp, p_sw);
286 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
287 "Switch 0x%016" PRIx64 ", hops = %f\n",
288 cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)), hops);
290 if (hops < best_hops) {
297 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
298 "Best switch is 0x%" PRIx64 ", hops = %f\n",
299 cl_ntoh64(osm_node_get_node_guid(p_best_sw->p_node)),
302 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
303 "No multicast capable switches detected\n");
305 OSM_LOG_EXIT(sm->p_log);
306 return ((osm_switch_t *) p_best_sw);
309 /**********************************************************************
310 This function returns the existing or optimal root swtich for the tree.
311 **********************************************************************/
312 static osm_switch_t *__osm_mcast_mgr_find_root_switch(osm_sm_t * sm,
316 const osm_switch_t *p_sw = NULL;
318 OSM_LOG_ENTER(sm->p_log);
321 We always look for the best multicast tree root switch.
322 Otherwise since we always start with a a single join
323 the root will be always on the first switch attached to it.
326 p_sw = __osm_mcast_mgr_find_optimal_switch(sm, p_mgrp);
328 OSM_LOG_EXIT(sm->p_log);
329 return ((osm_switch_t *) p_sw);
332 /**********************************************************************
333 **********************************************************************/
335 __osm_mcast_mgr_set_tbl(osm_sm_t * sm, IN osm_switch_t * const p_sw)
338 osm_dr_path_t *p_path;
339 osm_madw_context_t mad_context;
340 ib_api_status_t status;
341 uint32_t block_id_ho = 0;
342 int16_t block_num = 0;
343 uint32_t position = 0;
344 uint32_t max_position;
345 osm_mcast_tbl_t *p_tbl;
346 ib_net16_t block[IB_MCAST_BLOCK_SIZE];
347 osm_signal_t signal = OSM_SIGNAL_DONE;
351 OSM_LOG_ENTER(sm->p_log);
355 p_node = p_sw->p_node;
359 p_path = osm_physp_get_dr_path_ptr(osm_node_get_physp_ptr(p_node, 0));
362 Send multicast forwarding table blocks to the switch
363 as long as the switch indicates it has blocks needing
367 mad_context.mft_context.node_guid = osm_node_get_node_guid(p_node);
368 mad_context.mft_context.set_method = TRUE;
370 p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
371 max_position = p_tbl->max_position;
373 while (osm_mcast_tbl_get_block(p_tbl, block_num,
374 (uint8_t) position, block)) {
375 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
376 "Writing MFT block 0x%X\n", block_id_ho);
378 block_id_ho = block_num + (position << 28);
380 status = osm_req_set(sm, p_path, (void *)block, sizeof(block),
381 IB_MAD_ATTR_MCAST_FWD_TBL,
382 cl_hton32(block_id_ho),
383 CL_DISP_MSGID_NONE, &mad_context);
385 if (status != IB_SUCCESS) {
386 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A02: "
387 "Sending multicast fwd. tbl. block failed (%s)\n",
388 ib_get_err_str(status));
391 signal = OSM_SIGNAL_DONE_PENDING;
393 if (++position > max_position) {
399 OSM_LOG_EXIT(sm->p_log);
403 /**********************************************************************
404 This is part of the recursive function to compute the paths in the
405 spanning tree that eminate from this switch. On input, the p_list
406 contains the group members that must be routed from this switch.
407 **********************************************************************/
409 __osm_mcast_mgr_subdivide(osm_sm_t * sm,
410 osm_mgrp_t * const p_mgrp,
411 osm_switch_t * const p_sw,
412 cl_qlist_t * const p_list,
413 cl_qlist_t * const list_array,
414 uint8_t const array_size)
418 boolean_t ignore_existing;
419 osm_mcast_work_obj_t *p_wobj;
421 OSM_LOG_ENTER(sm->p_log);
423 mlid_ho = cl_ntoh16(osm_mgrp_get_mlid(p_mgrp));
426 For Multicast Groups, we want not to count on previous
427 configurations - since we can easily generate a storm
430 ignore_existing = TRUE;
433 Subdivide the set of ports into non-overlapping subsets
434 that will be routed to other switches.
437 (osm_mcast_work_obj_t *) cl_qlist_remove_head(p_list)) !=
438 (osm_mcast_work_obj_t *) cl_qlist_end(p_list)) {
440 osm_switch_recommend_mcast_path(p_sw, p_wobj->p_port,
441 mlid_ho, ignore_existing);
443 if (port_num == OSM_NO_PATH) {
445 This typically occurs if the switch does not support
446 multicast and the multicast tree must branch at this
449 uint64_t node_guid_ho =
450 cl_ntoh64(osm_node_get_node_guid(p_sw->p_node));
451 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A03: "
452 "Error routing MLID 0x%X through switch 0x%"
454 "\t\t\t\tNo multicast paths from this switch for port "
455 "with LID %u\n", mlid_ho, node_guid_ho,
456 cl_ntoh16(osm_port_get_base_lid
459 __osm_mcast_work_obj_delete(p_wobj);
463 if (port_num > array_size) {
464 uint64_t node_guid_ho =
465 cl_ntoh64(osm_node_get_node_guid(p_sw->p_node));
466 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A04: "
467 "Error routing MLID 0x%X through switch 0x%"
469 "\t\t\t\tNo multicast paths from this switch to port "
470 "with LID %u\n", mlid_ho, node_guid_ho,
471 cl_ntoh16(osm_port_get_base_lid
474 __osm_mcast_work_obj_delete(p_wobj);
476 /* This is means OpenSM has a bug. */
481 cl_qlist_insert_tail(&list_array[port_num], &p_wobj->list_item);
484 OSM_LOG_EXIT(sm->p_log);
487 /**********************************************************************
488 **********************************************************************/
489 static void __osm_mcast_mgr_purge_list(osm_sm_t * sm, cl_qlist_t * const p_list)
491 osm_mcast_work_obj_t *p_wobj;
493 OSM_LOG_ENTER(sm->p_log);
495 while ((p_wobj = (osm_mcast_work_obj_t *) cl_qlist_remove_head(p_list))
496 != (osm_mcast_work_obj_t *) cl_qlist_end(p_list)) {
497 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A06: "
498 "Unable to route for port 0x%" PRIx64 "\n",
499 osm_port_get_guid(p_wobj->p_port));
500 __osm_mcast_work_obj_delete(p_wobj);
503 OSM_LOG_EXIT(sm->p_log);
506 /**********************************************************************
507 This is the recursive function to compute the paths in the spanning
508 tree that emanate from this switch. On input, the p_list contains
509 the group members that must be routed from this switch.
511 The function returns the newly created mtree node element.
512 **********************************************************************/
513 static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_sm_t * sm,
514 osm_mgrp_t * const p_mgrp,
515 osm_switch_t * const p_sw,
516 cl_qlist_t * const p_list,
518 uint8_t const upstream_port,
519 uint8_t * const p_max_depth)
521 uint8_t max_children;
522 osm_mtree_node_t *p_mtn = NULL;
523 cl_qlist_t *list_array = NULL;
525 ib_net64_t node_guid;
526 uint64_t node_guid_ho;
527 osm_mcast_work_obj_t *p_wobj;
528 cl_qlist_t *p_port_list;
531 osm_mcast_tbl_t *p_tbl;
533 OSM_LOG_ENTER(sm->p_log);
537 CL_ASSERT(p_max_depth);
539 node_guid = osm_node_get_node_guid(p_sw->p_node);
540 node_guid_ho = cl_ntoh64(node_guid);
541 mlid_ho = cl_ntoh16(osm_mgrp_get_mlid(p_mgrp));
543 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
544 "Routing MLID 0x%X through switch 0x%" PRIx64
545 ", %u nodes at depth %u\n",
546 mlid_ho, node_guid_ho, cl_qlist_count(p_list), depth);
548 CL_ASSERT(cl_qlist_count(p_list) > 0);
553 OSM_LOG(sm->p_log, OSM_LOG_ERROR,
554 "Maximal hops number is reached for MLID 0x%x."
555 " Break processing.", mlid_ho);
556 __osm_mcast_mgr_purge_list(sm, p_list);
560 if (depth > *p_max_depth) {
561 CL_ASSERT(depth == *p_max_depth + 1);
562 *p_max_depth = depth;
565 if (osm_switch_supports_mcast(p_sw) == FALSE) {
567 This switch doesn't do multicast. Clean-up.
569 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A14: "
570 "Switch 0x%" PRIx64 " does not support multicast\n",
574 Deallocate all the work objects on this branch of the tree.
576 __osm_mcast_mgr_purge_list(sm, p_list);
580 p_mtn = osm_mtree_node_new(p_sw);
583 We are unable to continue routing down this
584 leg of the tree. Clean-up.
586 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A15: "
587 "Insufficient memory to build multicast tree\n");
590 Deallocate all the work objects on this branch of the tree.
592 __osm_mcast_mgr_purge_list(sm, p_list);
596 max_children = osm_mtree_node_get_max_children(p_mtn);
598 CL_ASSERT(max_children > 1);
601 Prepare an empty list for each port in the switch.
602 TO DO - this list array could probably be moved
603 inside the switch element to save on malloc thrashing.
605 list_array = malloc(sizeof(cl_qlist_t) * max_children);
606 if (list_array == NULL) {
607 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A16: "
608 "Unable to allocate list array\n");
609 __osm_mcast_mgr_purge_list(sm, p_list);
613 memset(list_array, 0, sizeof(cl_qlist_t) * max_children);
615 for (i = 0; i < max_children; i++)
616 cl_qlist_init(&list_array[i]);
618 __osm_mcast_mgr_subdivide(sm, p_mgrp, p_sw, p_list, list_array,
621 p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
624 Add the upstream port to the forwarding table unless
625 we're at the root of the spanning tree.
628 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
629 "Adding upstream port %u\n", upstream_port);
631 CL_ASSERT(upstream_port);
632 osm_mcast_tbl_set(p_tbl, mlid_ho, upstream_port);
636 For each port that was allocated some routes,
637 recurse into this function to continue building the tree
638 if the node on the other end of that port is another switch.
639 Otherwise, the node is an endpoint, and we've found a leaf
640 of the tree. Mark leaves with our special pointer value.
643 for (i = 0; i < max_children; i++) {
644 const osm_physp_t *p_physp;
645 const osm_physp_t *p_remote_physp;
647 const osm_node_t *p_remote_node;
649 p_port_list = &list_array[i];
651 count = cl_qlist_count(p_port_list);
654 There should be no children routed through the upstream port!
656 CL_ASSERT((upstream_port == 0) || (i != upstream_port) ||
657 ((i == upstream_port) && (count == 0)));
660 continue; /* No routes down this port. */
662 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
663 "Routing %zu destinations via switch port %u\n",
667 This port routes frames for this mcast group. Therefore,
668 set the appropriate bit in the multicast forwarding
669 table for this switch.
671 osm_mcast_tbl_set(p_tbl, mlid_ho, i);
673 /* This means we are adding the switch to the MC group.
674 We do not need to continue looking at the remote port, just
675 needed to add the port to the table */
676 CL_ASSERT(count == 1);
678 p_wobj = (osm_mcast_work_obj_t *)
679 cl_qlist_remove_head(p_port_list);
680 __osm_mcast_work_obj_delete(p_wobj);
684 p_node = p_sw->p_node;
685 p_remote_node = osm_node_get_remote_node(p_node, i, NULL);
689 if (osm_node_get_type(p_remote_node) == IB_NODE_TYPE_SWITCH) {
691 Acquire a pointer to the remote switch then recurse.
693 CL_ASSERT(p_remote_node->sw);
695 p_physp = osm_node_get_physp_ptr(p_node, i);
698 p_remote_physp = osm_physp_get_remote(p_physp);
699 CL_ASSERT(p_remote_physp);
701 p_mtn->child_array[i] =
702 __osm_mcast_mgr_branch(sm, p_mgrp,
705 osm_physp_get_port_num
710 The neighbor node is not a switch, so this
713 CL_ASSERT(count == 1);
715 p_mtn->child_array[i] = OSM_MTREE_LEAF;
716 p_wobj = (osm_mcast_work_obj_t *)
717 cl_qlist_remove_head(p_port_list);
719 CL_ASSERT(cl_is_qlist_empty(p_port_list));
721 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
722 "Found leaf for port 0x%016" PRIx64
723 " on switch port %u\n",
724 cl_ntoh64(osm_port_get_guid(p_wobj->p_port)),
727 __osm_mcast_work_obj_delete(p_wobj);
733 OSM_LOG_EXIT(sm->p_log);
737 /**********************************************************************
738 **********************************************************************/
739 static ib_api_status_t
740 __osm_mcast_mgr_build_spanning_tree(osm_sm_t * sm, osm_mgrp_t * const p_mgrp)
742 const cl_qmap_t *p_mcm_tbl;
743 const osm_port_t *p_port;
744 const osm_mcm_port_t *p_mcm_port;
746 cl_qlist_t port_list;
748 osm_mcast_work_obj_t *p_wobj;
749 ib_api_status_t status = IB_SUCCESS;
750 uint8_t max_depth = 0;
753 OSM_LOG_ENTER(sm->p_log);
755 cl_qlist_init(&port_list);
758 TO DO - for now, just blow away the old tree.
759 In the future we'll need to construct the tree based
760 on multicast forwarding table information if the user wants to
761 preserve existing multicast routes.
763 __osm_mcast_mgr_purge_tree(sm, p_mgrp);
765 p_mcm_tbl = &p_mgrp->mcm_port_tbl;
766 num_ports = cl_qmap_count(p_mcm_tbl);
767 if (num_ports == 0) {
768 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
769 "MLID 0x%X has no members - nothing to do\n",
770 cl_ntoh16(osm_mgrp_get_mlid(p_mgrp)));
775 This function builds the single spanning tree recursively.
776 At each stage, the ports to be reached are divided into
777 non-overlapping subsets of member ports that can be reached through
778 a given switch port. Construction then moves down each
779 branch, and the process starts again with each branch computing
780 for its own subset of the member ports.
782 The maximum recursion depth is at worst the maximum hop count in the
783 subnet, which is spec limited to 64.
787 Locate the switch around which to create the spanning
788 tree for this multicast group.
790 p_sw = __osm_mcast_mgr_find_root_switch(sm, p_mgrp);
792 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A08: "
793 "Unable to locate a suitable switch for group 0x%X\n",
794 cl_ntoh16(osm_mgrp_get_mlid(p_mgrp)));
800 Build the first "subset" containing all member ports.
802 for (p_mcm_port = (osm_mcm_port_t *) cl_qmap_head(p_mcm_tbl);
803 p_mcm_port != (osm_mcm_port_t *) cl_qmap_end(p_mcm_tbl);
805 (osm_mcm_port_t *) cl_qmap_next(&p_mcm_port->map_item)) {
807 Acquire the port object for this port guid, then create
808 the new worker object to build the list.
810 p_port = osm_get_port_by_guid(sm->p_subn,
811 ib_gid_get_guid(&p_mcm_port->
814 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A09: "
815 "No port object for port 0x%016" PRIx64 "\n",
816 cl_ntoh64(ib_gid_get_guid
817 (&p_mcm_port->port_gid)));
821 p_wobj = __osm_mcast_work_obj_new(p_port);
822 if (p_wobj == NULL) {
823 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A10: "
824 "Insufficient memory to route port 0x%016"
826 cl_ntoh64(osm_port_get_guid(p_port)));
830 cl_qlist_insert_tail(&port_list, &p_wobj->list_item);
833 count = cl_qlist_count(&port_list);
834 p_mgrp->p_root = __osm_mcast_mgr_branch(sm, p_mgrp, p_sw,
835 &port_list, 0, 0, &max_depth);
837 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
838 "Configured MLID 0x%X for %u ports, max tree depth = %u\n",
839 cl_ntoh16(osm_mgrp_get_mlid(p_mgrp)), count, max_depth);
842 OSM_LOG_EXIT(sm->p_log);
848 /**********************************************************************
849 **********************************************************************/
851 osm_mcast_mgr_set_table(osm_sm_t * sm,
852 IN const osm_mgrp_t * const p_mgrp,
853 IN const osm_mtree_node_t * const p_mtn)
856 uint8_t max_children;
857 osm_mtree_node_t *p_child_mtn;
859 osm_mcast_tbl_t *p_tbl;
862 OSM_LOG_ENTER(sm->p_log);
864 mlid_ho = cl_ntoh16(osm_mgrp_get_mlid(p_mgrp));
865 p_sw = osm_mtree_node_get_switch_ptr(p_mtn);
869 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
870 "Configuring MLID 0x%X on switch 0x%" PRIx64 "\n",
871 mlid_ho, osm_node_get_node_guid(p_sw->p_node));
874 For every child of this tree node, set the corresponding
875 bit in the switch's mcast table.
877 p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
878 max_children = osm_mtree_node_get_max_children(p_mtn);
880 CL_ASSERT(max_children <= osm_switch_get_num_ports(p_sw));
882 osm_mcast_tbl_clear_mlid(p_tbl, mlid_ho);
884 for (i = 0; i < max_children; i++) {
885 p_child_mtn = osm_mtree_node_get_child(p_mtn, i);
886 if (p_child_mtn == NULL)
889 osm_mcast_tbl_set(p_tbl, mlid_ho, i);
892 OSM_LOG_EXIT(sm->p_log);
896 /**********************************************************************
897 **********************************************************************/
898 static void __osm_mcast_mgr_clear(osm_sm_t * sm, IN osm_mgrp_t * const p_mgrp)
902 osm_mcast_tbl_t *p_mcast_tbl;
904 OSM_LOG_ENTER(sm->p_log);
907 Walk the switches and clear the routing entries for
910 p_sw_tbl = &sm->p_subn->sw_guid_tbl;
911 p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
912 while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) {
913 p_mcast_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
914 osm_mcast_tbl_clear_mlid(p_mcast_tbl, cl_ntoh16(p_mgrp->mlid));
915 p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
918 OSM_LOG_EXIT(sm->p_log);
922 /* TO DO - make this real -- at least update spanning tree */
923 /**********************************************************************
924 Lock must be held on entry.
925 **********************************************************************/
927 osm_mcast_mgr_process_single(osm_sm_t * sm,
928 IN ib_net16_t const mlid,
929 IN ib_net64_t const port_guid,
930 IN uint8_t const join_state)
936 osm_physp_t *p_physp;
937 osm_physp_t *p_remote_physp;
938 osm_node_t *p_remote_node;
939 osm_mcast_tbl_t *p_mcast_tbl;
940 ib_api_status_t status = IB_SUCCESS;
942 OSM_LOG_ENTER(sm->p_log);
945 CL_ASSERT(port_guid);
947 mlid_ho = cl_ntoh16(mlid);
949 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
950 "Attempting to add port 0x%" PRIx64 " to MLID 0x%X, "
951 "\n\t\t\t\tjoin state = 0x%X\n",
952 cl_ntoh64(port_guid), mlid_ho, join_state);
955 Acquire the Port object.
957 p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
959 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A01: "
960 "Unable to acquire port object for 0x%" PRIx64 "\n",
961 cl_ntoh64(port_guid));
966 p_physp = p_port->p_physp;
967 if (p_physp == NULL) {
968 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A05: "
969 "Unable to acquire phsyical port object for 0x%" PRIx64
970 "\n", cl_ntoh64(port_guid));
975 p_remote_physp = osm_physp_get_remote(p_physp);
976 if (p_remote_physp == NULL) {
977 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A11: "
978 "Unable to acquire remote phsyical port object "
979 "for 0x%" PRIx64 "\n", cl_ntoh64(port_guid));
984 p_remote_node = osm_physp_get_node_ptr(p_remote_physp);
986 CL_ASSERT(p_remote_node);
988 sw_guid = osm_node_get_node_guid(p_remote_node);
990 if (osm_node_get_type(p_remote_node) != IB_NODE_TYPE_SWITCH) {
991 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A22: "
992 "Remote node not a switch node 0x%" PRIx64 "\n",
998 if (!p_remote_node->sw) {
999 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A12: "
1000 "No switch object 0x%" PRIx64 "\n", cl_ntoh64(sw_guid));
1005 if (osm_switch_is_in_mcast_tree(p_remote_node->sw, mlid_ho)) {
1007 We're in luck. The switch attached to this port
1008 is already in the multicast group, so we can just
1009 add the specified port as a new leaf of the tree.
1011 if (join_state & (IB_JOIN_STATE_FULL | IB_JOIN_STATE_NON)) {
1013 This node wants to receive multicast frames.
1014 Get the switch port number to which the new member port
1015 is attached, then configure this single mcast table.
1017 port_num = osm_physp_get_port_num(p_remote_physp);
1018 CL_ASSERT(port_num);
1021 osm_switch_get_mcast_tbl_ptr(p_remote_node->sw);
1022 osm_mcast_tbl_set(p_mcast_tbl, mlid_ho, port_num);
1024 if (join_state & IB_JOIN_STATE_SEND_ONLY)
1025 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
1026 "Success. Nothing to do for send"
1029 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A13: "
1030 "Unknown join state 0x%X\n",
1037 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Unable to add port\n");
1040 OSM_LOG_EXIT(sm->p_log);
1045 /**********************************************************************
1046 lock must already be held on entry
1047 **********************************************************************/
1048 static ib_api_status_t
1049 osm_mcast_mgr_process_tree(osm_sm_t * sm,
1050 IN osm_mgrp_t * const p_mgrp,
1051 IN osm_mcast_req_type_t req_type,
1052 ib_net64_t port_guid)
1054 ib_api_status_t status = IB_SUCCESS;
1057 OSM_LOG_ENTER(sm->p_log);
1059 mlid = osm_mgrp_get_mlid(p_mgrp);
1061 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
1062 "Processing multicast group 0x%X\n", cl_ntoh16(mlid));
1065 If there are no switches in the subnet, then we have nothing to do.
1067 if (cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) {
1068 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
1069 "No switches in subnet. Nothing to do\n");
1074 Clear the multicast tables to start clean, then build
1075 the spanning tree which sets the mcast table bits for each
1078 __osm_mcast_mgr_clear(sm, p_mgrp);
1080 if (!p_mgrp->full_members)
1083 status = __osm_mcast_mgr_build_spanning_tree(sm, p_mgrp);
1084 if (status != IB_SUCCESS) {
1085 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A17: "
1086 "Unable to create spanning tree (%s)\n",
1087 ib_get_err_str(status));
1092 OSM_LOG_EXIT(sm->p_log);
1096 /**********************************************************************
1097 Process the entire group.
1098 NOTE : The lock should be held externally!
1099 **********************************************************************/
1100 static ib_api_status_t
1101 mcast_mgr_process_mgrp(osm_sm_t * sm,
1102 IN osm_mgrp_t * const p_mgrp,
1103 IN osm_mcast_req_type_t req_type,
1104 IN ib_net64_t port_guid)
1106 ib_api_status_t status;
1108 OSM_LOG_ENTER(sm->p_log);
1110 status = osm_mcast_mgr_process_tree(sm, p_mgrp, req_type, port_guid);
1111 if (status != IB_SUCCESS) {
1112 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A19: "
1113 "Unable to create spanning tree (%s)\n",
1114 ib_get_err_str(status));
1117 p_mgrp->last_tree_id = p_mgrp->last_change_id;
1119 /* remove MCGRP if it is marked for deletion */
1120 if (p_mgrp->to_be_deleted) {
1121 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
1122 "Destroying mgrp with lid:0x%x\n",
1123 cl_ntoh16(p_mgrp->mlid));
1124 sm->p_subn->mgroups[cl_ntoh16(p_mgrp->mlid) - IB_LID_MCAST_START_HO] = NULL;
1125 osm_mgrp_delete(p_mgrp);
1129 OSM_LOG_EXIT(sm->p_log);
1133 /**********************************************************************
1134 **********************************************************************/
1135 osm_signal_t osm_mcast_mgr_process(osm_sm_t * sm)
1137 osm_signal_t signal;
1139 cl_qmap_t *p_sw_tbl;
1140 cl_qlist_t *p_list = &sm->mgrp_list;
1142 boolean_t pending_transactions = FALSE;
1145 OSM_LOG_ENTER(sm->p_log);
1147 p_sw_tbl = &sm->p_subn->sw_guid_tbl;
1149 While holding the lock, iterate over all the established
1150 multicast groups, servicing each in turn.
1152 Then, download the multicast tables to the switches.
1154 CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
1156 for (i = 0; i <= sm->p_subn->max_mcast_lid_ho - IB_LID_MCAST_START_HO;
1159 We reached here due to some change that caused a heavy sweep
1160 of the subnet. Not due to a specific multicast request.
1161 So the request type is subnet_change and the port guid is 0.
1163 p_mgrp = sm->p_subn->mgroups[i];
1165 mcast_mgr_process_mgrp(sm, p_mgrp,
1166 OSM_MCAST_REQ_TYPE_SUBNET_CHANGE,
1171 Walk the switches and download the tables for each.
1173 p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
1174 while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) {
1175 signal = __osm_mcast_mgr_set_tbl(sm, p_sw);
1176 if (signal == OSM_SIGNAL_DONE_PENDING)
1177 pending_transactions = TRUE;
1178 p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
1181 while (!cl_is_qlist_empty(p_list)) {
1182 cl_list_item_t *p = cl_qlist_remove_head(p_list);
1186 CL_PLOCK_RELEASE(sm->p_lock);
1188 OSM_LOG_EXIT(sm->p_log);
1190 if (pending_transactions == TRUE)
1191 return (OSM_SIGNAL_DONE_PENDING);
1193 return (OSM_SIGNAL_DONE);
1196 /**********************************************************************
1197 This is the function that is invoked during idle time to handle the
1198 process request for mcast groups where join/leave/delete was required.
1199 **********************************************************************/
1200 osm_signal_t osm_mcast_mgr_process_mgroups(osm_sm_t * sm)
1202 cl_qlist_t *p_list = &sm->mgrp_list;
1204 cl_qmap_t *p_sw_tbl;
1207 osm_signal_t ret, signal = OSM_SIGNAL_DONE;
1208 osm_mcast_mgr_ctxt_t *ctx;
1209 osm_mcast_req_type_t req_type;
1210 ib_net64_t port_guid;
1212 OSM_LOG_ENTER(sm->p_log);
1214 /* we need a lock to make sure the p_mgrp is not change other ways */
1215 CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
1217 while (!cl_is_qlist_empty(p_list)) {
1218 ctx = (osm_mcast_mgr_ctxt_t *) cl_qlist_remove_head(p_list);
1219 req_type = ctx->req_type;
1220 port_guid = ctx->port_guid;
1222 /* nice copy no warning on size diff */
1223 memcpy(&mlid, &ctx->mlid, sizeof(mlid));
1225 /* we can destroy the context now */
1228 /* since we delayed the execution we prefer to pass the
1229 mlid as the mgrp identifier and then find it or abort */
1230 p_mgrp = osm_get_mgrp_by_mlid(sm->p_subn, mlid);
1234 /* if there was no change from the last time
1235 * we processed the group we can skip doing anything
1237 if (p_mgrp->last_change_id == p_mgrp->last_tree_id) {
1238 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
1239 "Skip processing mgrp with lid:0x%X change id:%u\n",
1240 cl_ntoh16(mlid), p_mgrp->last_change_id);
1244 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
1245 "Processing mgrp with lid:0x%X change id:%u\n",
1246 cl_ntoh16(mlid), p_mgrp->last_change_id);
1247 mcast_mgr_process_mgrp(sm, p_mgrp, req_type, port_guid);
1251 Walk the switches and download the tables for each.
1253 p_sw_tbl = &sm->p_subn->sw_guid_tbl;
1254 p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
1255 while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) {
1256 ret = __osm_mcast_mgr_set_tbl(sm, p_sw);
1257 if (ret == OSM_SIGNAL_DONE_PENDING)
1259 p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
1262 osm_dump_mcast_routes(sm->p_subn->p_osm);
1264 CL_PLOCK_RELEASE(sm->p_lock);
1265 OSM_LOG_EXIT(sm->p_log);