3 # Copyright (c) 2008 Voltaire, Inc. All rights reserved.
4 # Copyright (c) 2006 Mellanox Technologies. All rights reserved.
6 # This Software is licensed under one of the following licenses:
8 # 1) under the terms of the "Common Public License 1.0" a copy of which is
9 # available from the Open Source Initiative, see
10 # http://www.opensource.org/licenses/cpl.php.
12 # 2) under the terms of the "The BSD License" a copy of which is
13 # available from the Open Source Initiative, see
14 # http://www.opensource.org/licenses/bsd-license.php.
16 # 3) under the terms of the "GNU General Public License (GPL) Version 2" a
17 # copy of which is available from the Open Source Initiative, see
18 # http://www.opensource.org/licenses/gpl-license.php.
20 # Licensee has the right to choose one of the above licenses.
22 # Redistributions of source code must retain the above copyright
23 # notice and one of the license notices.
25 # Redistributions in binary form must reproduce both the above copyright
26 # notice, one of the license notices in the documentation
27 # and/or other materials provided with the distribution.
31 # OpenSM found to have the following problem
32 # when handover is performed:
33 # If some of the cluster nodes are rebooted during the handover they loose their LID assignment.
34 # The reason for it is that the standby SM does not obey its own Guid to LID table
35 # and simply uses the discovered LIDs. If some nodes are not available for it
36 # their previous LID assignment is lost forever.
38 # The idea is to use an external daemon that will distribute
39 # the semi-static LID assignment table from the master SM to all standby SMs.
40 # A standby SM, becoming a master . needs to obey the copied semi static LID assignment table.
43 exec_prefix=@exec_prefix@
45 CONFIG=@sysconfdir@/sysconfig/opensm
46 if [ -f $CONFIG ]; then
50 SLDD_DEBUG=${SLDD_DEBUG:-0}
52 CACHE_FILE=${CACHE_FILE:-/var/cache/opensm/guid2lid}
53 CACHE_DIR=$(dirname ${CACHE_FILE})
54 tmp_cache=${CACHE_FILE}.tmp
58 RCP=${RCP:-/usr/bin/scp}
59 RSH=${RSH:-/usr/bin/ssh}
60 IFCONFIG=${IFCONFIG:-'/sbin/ifconfig -a'}
63 RESCAN_TIME=${RESCAN_TIME:-60}
65 if [ -z "${OSM_HOSTS}" ]; then
66 [ $SLDD_DEBUG -eq 1 ] &&
67 echo "No OpenSM servers (OSM_HOSTS) configured for the IB subnet."
72 declare -a arr_OSM_HOSTS
73 arr_OSM_HOSTS=(${OSM_HOSTS})
75 num_of_osm_hosts=${#arr_OSM_HOSTS[@]}
77 if [ ${num_of_osm_hosts} -eq 1 ]; then
78 [ $SLDD_DEBUG -eq 1 ] &&
79 echo "One OpenSM server configured in the IB subnet." &&
80 echo "Nothing to be done for SLDD"
85 trap 'trap_handler' 15
89 logger -i "SLDD: Exiting."
95 $PING $1 > /dev/null 2>&1
101 $IFCONFIG | grep -w "$1" > /dev/null 2>&1
105 update_remote_cache()
107 /bin/rm -f ${CACHE_FILE}.upd
108 /bin/cp -a ${CACHE_FILE} ${CACHE_FILE}.upd
110 [ $SLDD_DEBUG -eq 1 ] &&
111 echo "Updating remote cache file"
113 for host in ${OSM_HOSTS}
115 # Skip local host update
116 if [ "${host}" == "${local_host}" ]; then
120 if is_alive $host; then
121 stat=$($RSH $host "/bin/mkdir -p ${CACHE_DIR} > /dev/null 2>&1; /bin/rm -f ${CACHE_FILE}.${local_host} > /dev/null 2>&1; echo \$?" | tr -d '[:space:]')
122 if [ "X${stat}" == "X0" ]; then
123 [ $SLDD_DEBUG -eq 1 ] &&
124 echo "Updating $host"
125 logger -i "SLDD: updating $host with ${CACHE_FILE}"
126 $RCP ${CACHE_FILE}.upd ${host}:${CACHE_FILE}.${local_host}
127 /bin/cp ${CACHE_FILE}.upd ${CACHE_FILE}.${host}
129 [ $SLDD_DEBUG -eq 1 ] &&
130 echo "$RSH to $host failed."
131 logger -i "SLDD: Failed to update $host with ${CACHE_FILE}. $RSH without password should be enabled"
135 [ $SLDD_DEBUG -eq 1 ] &&
136 echo "$host is down."
142 get_latest_remote_cache()
144 # Find most updated remote cache file (the suffix should be like ip address: *.*.*.*)
145 echo -n "$(/bin/ls -1t ${CACHE_FILE}.*.* 2> /dev/null | head -1)"
148 get_largest_remote_cache()
150 # Find largest (size) remote cache file (the suffix should be like ip address: *.*.*.*)
151 echo -n "$(/bin/ls -1S ${CACHE_FILE}.*.* 2> /dev/null | head -1)"
156 /bin/rm -f ${CACHE_FILE}.old
157 /bin/mv ${CACHE_FILE} ${CACHE_FILE}.old
158 /bin/cp ${largest_remote_cache} ${CACHE_FILE}
159 touch ${CACHE_FILE}.tmp
162 # Find local host in the osm hosts list
164 for host in ${OSM_HOSTS}
166 if is_local $host; then
171 # Get cache file info
172 declare -i new_size=0
173 declare -i last_size=0
174 declare -i largest_remote_cache_size=0
176 if [ -e ${CACHE_FILE} ]; then
177 last_size=$(du -b ${CACHE_FILE} | awk '{print$1}' | tr -d '[:space:]')
179 touch ${CACHE_FILE} ${CACHE_FILE}.tmp
182 # if [ ${last_size} -gt 0 ]; then
183 # # First time update
184 # update_remote_cache
189 if [ -s "${CACHE_FILE}" ]; then
190 new_size=$(du -b ${CACHE_FILE} | awk '{print$1}' | tr -d '[:space:]')
191 # Check if local cache file grew from its last version or the time stamp changed
192 if [ ${new_size} -gt ${last_size} ]
193 [ "$(/bin/ls -1t ${CACHE_FILE} ${CACHE_FILE}.tmp 2> /dev/null | head -1)" != "${CACHE_FILE}.tmp" ]; then
194 largest_remote_cache=$(get_largest_remote_cache)
195 if [[ -n "${largest_remote_cache}" && -s "${largest_remote_cache}" ]]; then
196 largest_remote_cache_size=$(du -b ${largest_remote_cache} 2> /dev/null | awk '{print$1}' | tr -d '[:space:]')
198 largest_remote_cache_size=0
201 # Check if local cache file larger than remote chache file
202 if [ ${new_size} -gt ${largest_remote_cache_size} ]; then
203 [ $SLDD_DEBUG -eq 1 ] &&
204 echo "Local cache file larger then remote. Update remote cache files"
205 last_size=${new_size}
211 largest_remote_cache=$(get_largest_remote_cache)
212 if [[ -n "${largest_remote_cache}" && -s "${largest_remote_cache}" ]]; then
213 largest_remote_cache_size=$(du -b ${largest_remote_cache} 2> /dev/null | awk '{print$1}' | tr -d '[:space:]')
215 largest_remote_cache_size=0
218 # Update local cache file from remote
219 if [ ${largest_remote_cache_size} -gt ${new_size} ]; then
220 [ $SLDD_DEBUG -eq 1 ] &&
221 echo "Local cache file shorter then remote. Use ${largest_remote_cache}"
222 logger -i "SLDD: updating local cache file with ${largest_remote_cache}"
224 last_size=${largest_remote_cache_size}
227 else # The local cache file is empty
228 [ $SLDD_DEBUG -eq 1 ] &&
229 echo "${CACHE_FILE} is empty"
231 largest_remote_cache=$(get_largest_remote_cache)
232 if [[ -n "${largest_remote_cache}" && -s "${largest_remote_cache}" ]]; then
233 # Copy it to the current cache
234 [ $SLDD_DEBUG -eq 1 ] &&
235 echo "Local cache file is empty. Use ${largest_remote_cache}"
236 logger -i "SLDD: updating local cache file with ${largest_remote_cache}"
242 [ $SLDD_DEBUG -eq 1 ] &&
243 echo "Sleeping ${RESCAN_TIME} seconds."