]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - contrib/ofed/management/opensm/opensm/osm_perfmgr.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / contrib / ofed / management / opensm / opensm / osm_perfmgr.c
1 /*
2  * Copyright (c) 2007 The Regents of the University of California.
3  * Copyright (c) 2007-2008 Voltaire, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  *
33  */
34
35 /*
36  * Abstract:
37  *    Implementation of osm_perfmgr_t.
38  * This object implements an IBA performance manager.
39  *
40  * Author:
41  *    Ira Weiny, LLNL
42  */
43
44 #if HAVE_CONFIG_H
45 #  include <config.h>
46 #endif                          /* HAVE_CONFIG_H */
47
48 #ifdef ENABLE_OSM_PERF_MGR
49
50 #include <stdlib.h>
51 #include <stdint.h>
52 #include <string.h>
53 #include <poll.h>
54 #include <errno.h>
55 #include <sys/time.h>
56 #include <netinet/in.h>
57 #include <float.h>
58 #include <arpa/inet.h>
59 #include <iba/ib_types.h>
60 #include <complib/cl_debug.h>
61 #include <complib/cl_thread.h>
62 #include <vendor/osm_vendor_api.h>
63 #include <opensm/osm_perfmgr.h>
64 #include <opensm/osm_log.h>
65 #include <opensm/osm_node.h>
66 #include <opensm/osm_opensm.h>
67
68 #define OSM_PERFMGR_INITIAL_TID_VALUE 0xcafe
69
70 #if ENABLE_OSM_PERF_MGR_PROFILE
71 struct {
72         double fastest_us;
73         double slowest_us;
74         double avg_us;
75         uint64_t num;
76 } perfmgr_mad_stats = {
77         fastest_us: DBL_MAX,
78         slowest_us: DBL_MIN,
79         avg_us: 0,
80         num: 0
81 };
82
83 /* diff must be something which can fit in a susecond_t */
84 static inline void update_mad_stats(struct timeval *diff)
85 {
86         double new = (diff->tv_sec * 1000000) + diff->tv_usec;
87         if (new < perfmgr_mad_stats.fastest_us)
88                 perfmgr_mad_stats.fastest_us = new;
89         if (new > perfmgr_mad_stats.slowest_us)
90                 perfmgr_mad_stats.slowest_us = new;
91
92         perfmgr_mad_stats.avg_us =
93             ((perfmgr_mad_stats.avg_us * perfmgr_mad_stats.num) + new)
94             / (perfmgr_mad_stats.num + 1);
95         perfmgr_mad_stats.num++;
96 }
97
98 static inline void perfmgr_clear_mad_stats(void)
99 {
100         perfmgr_mad_stats.fastest_us = DBL_MAX;
101         perfmgr_mad_stats.slowest_us = DBL_MIN;
102         perfmgr_mad_stats.avg_us = 0;
103         perfmgr_mad_stats.num = 0;
104 }
105
106 /* after and diff can be the same struct */
107 static inline void diff_time(struct timeval *before,
108                              struct timeval *after, struct timeval *diff)
109 {
110         struct timeval tmp = *after;
111         if (tmp.tv_usec < before->tv_usec) {
112                 tmp.tv_sec--;
113                 tmp.tv_usec += 1000000;
114         }
115         diff->tv_sec = tmp.tv_sec - before->tv_sec;
116         diff->tv_usec = tmp.tv_usec - before->tv_usec;
117 }
118
119 #endif
120
121 extern int wait_for_pending_transactions(osm_stats_t * stats);
122
123 /**********************************************************************
124  * Internal helper functions.
125  **********************************************************************/
126 static inline void __init_monitored_nodes(osm_perfmgr_t * pm)
127 {
128         cl_qmap_init(&pm->monitored_map);
129         pm->remove_list = NULL;
130         cl_event_construct(&pm->sig_query);
131         cl_event_init(&pm->sig_query, FALSE);
132 }
133
134 static inline void
135 __mark_for_removal(osm_perfmgr_t * pm, __monitored_node_t * node)
136 {
137         if (pm->remove_list) {
138                 node->next = pm->remove_list;
139                 pm->remove_list = node;
140         } else {
141                 node->next = NULL;
142                 pm->remove_list = node;
143         }
144 }
145
146 static inline void __remove_marked_nodes(osm_perfmgr_t * pm)
147 {
148         while (pm->remove_list) {
149                 __monitored_node_t *next = pm->remove_list->next;
150
151                 cl_qmap_remove_item(&(pm->monitored_map),
152                                     (cl_map_item_t *) (pm->remove_list));
153
154                 if (pm->remove_list->name)
155                         free(pm->remove_list->name);
156                 free(pm->remove_list);
157                 pm->remove_list = next;
158         }
159 }
160
161 static inline void __decrement_outstanding_queries(osm_perfmgr_t * pm)
162 {
163         cl_atomic_dec(&(pm->outstanding_queries));
164         cl_event_signal(&(pm->sig_query));
165 }
166
167 /**********************************************************************
168  * Receive the MAD from the vendor layer and post it for processing by
169  * the dispatcher.
170  **********************************************************************/
171 static void
172 osm_perfmgr_mad_recv_callback(osm_madw_t * p_madw, void *bind_context,
173                               osm_madw_t * p_req_madw)
174 {
175         osm_perfmgr_t *pm = (osm_perfmgr_t *) bind_context;
176
177         OSM_LOG_ENTER(pm->log);
178
179         osm_madw_copy_context(p_madw, p_req_madw);
180         osm_mad_pool_put(pm->mad_pool, p_req_madw);
181
182         __decrement_outstanding_queries(pm);
183
184         /* post this message for later processing. */
185         if (cl_disp_post(pm->pc_disp_h, OSM_MSG_MAD_PORT_COUNTERS,
186                          (void *)p_madw, NULL, NULL) != CL_SUCCESS) {
187                 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C01: "
188                         "PerfMgr Dispatcher post failed\n");
189                 osm_mad_pool_put(pm->mad_pool, p_madw);
190         }
191         OSM_LOG_EXIT(pm->log);
192 }
193
194 /**********************************************************************
195  * Process MAD send errors.
196  **********************************************************************/
197 static void
198 osm_perfmgr_mad_send_err_callback(void *bind_context, osm_madw_t * p_madw)
199 {
200         osm_perfmgr_t *pm = (osm_perfmgr_t *) bind_context;
201         osm_madw_context_t *context = &(p_madw->context);
202         uint64_t node_guid = context->perfmgr_context.node_guid;
203         uint8_t port = context->perfmgr_context.port;
204         cl_map_item_t *p_node;
205         __monitored_node_t *p_mon_node;
206
207         OSM_LOG_ENTER(pm->log);
208
209         /* go ahead and get the monitored node struct to have the printable
210          * name if needed in messages
211          */
212         if ((p_node = cl_qmap_get(&(pm->monitored_map), node_guid)) ==
213             cl_qmap_end(&(pm->monitored_map))) {
214                 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C15: GUID 0x%016"
215                         PRIx64 " not found in monitored map\n",
216                         node_guid);
217                 goto Exit;
218         }
219         p_mon_node = (__monitored_node_t *) p_node;
220
221         OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C02: %s (0x%" PRIx64
222                 ") port %u\n", p_mon_node->name, p_mon_node->guid, port);
223
224         if (pm->subn->opt.perfmgr_redir && p_madw->status == IB_TIMEOUT) {
225                 /* First, find the node in the monitored map */
226                 cl_plock_acquire(pm->lock);
227                 /* Now, validate port number */
228                 if (port > p_mon_node->redir_tbl_size) {
229                         cl_plock_release(pm->lock);
230                         OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C16: "
231                                 "Invalid port num %u for %s (GUID 0x%016"
232                                 PRIx64 ") num ports %u\n", port, p_mon_node->name,
233                                 p_mon_node->guid, p_mon_node->redir_tbl_size);
234                         goto Exit;
235                 }
236                 /* Clear redirection info */
237                 p_mon_node->redir_port[port].redir_lid = 0;
238                 p_mon_node->redir_port[port].redir_qp = 0;
239                 cl_plock_release(pm->lock);
240         }
241
242 Exit:
243         osm_mad_pool_put(pm->mad_pool, p_madw);
244
245         __decrement_outstanding_queries(pm);
246
247         OSM_LOG_EXIT(pm->log);
248 }
249
250 /**********************************************************************
251  * Bind the PerfMgr to the vendor layer for MAD sends/receives
252  **********************************************************************/
253 ib_api_status_t
254 osm_perfmgr_bind(osm_perfmgr_t * const pm, const ib_net64_t port_guid)
255 {
256         osm_bind_info_t bind_info;
257         ib_api_status_t status = IB_SUCCESS;
258
259         OSM_LOG_ENTER(pm->log);
260
261         if (pm->bind_handle != OSM_BIND_INVALID_HANDLE) {
262                 OSM_LOG(pm->log, OSM_LOG_ERROR,
263                         "ERR 4C03: Multiple binds not allowed\n");
264                 status = IB_ERROR;
265                 goto Exit;
266         }
267
268         bind_info.port_guid = port_guid;
269         bind_info.mad_class = IB_MCLASS_PERF;
270         bind_info.class_version = 1;
271         bind_info.is_responder = FALSE;
272         bind_info.is_report_processor = FALSE;
273         bind_info.is_trap_processor = FALSE;
274         bind_info.recv_q_size = OSM_PM_DEFAULT_QP1_RCV_SIZE;
275         bind_info.send_q_size = OSM_PM_DEFAULT_QP1_SEND_SIZE;
276
277         OSM_LOG(pm->log, OSM_LOG_VERBOSE,
278                 "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid));
279
280         pm->bind_handle = osm_vendor_bind(pm->vendor,
281                                           &bind_info,
282                                           pm->mad_pool,
283                                           osm_perfmgr_mad_recv_callback,
284                                           osm_perfmgr_mad_send_err_callback,
285                                           pm);
286
287         if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) {
288                 status = IB_ERROR;
289                 OSM_LOG(pm->log, OSM_LOG_ERROR,
290                         "ERR 4C04: Vendor specific bind failed (%s)\n",
291                         ib_get_err_str(status));
292                 goto Exit;
293         }
294
295 Exit:
296         OSM_LOG_EXIT(pm->log);
297         return (status);
298 }
299
300 /**********************************************************************
301  * Unbind the PerfMgr from the vendor layer for MAD sends/receives
302  **********************************************************************/
303 static void osm_perfmgr_mad_unbind(osm_perfmgr_t * const pm)
304 {
305         OSM_LOG_ENTER(pm->log);
306         if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) {
307                 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C05: No previous bind\n");
308                 goto Exit;
309         }
310         osm_vendor_unbind(pm->bind_handle);
311 Exit:
312         OSM_LOG_EXIT(pm->log);
313 }
314
315 /**********************************************************************
316  * Given a monitored node and a port, return the qp
317  **********************************************************************/
318 static ib_net32_t get_qp(__monitored_node_t * mon_node, uint8_t port)
319 {
320         ib_net32_t qp = cl_ntoh32(1);
321
322         if (mon_node && mon_node->redir_tbl_size &&
323             port < mon_node->redir_tbl_size &&
324             mon_node->redir_port[port].redir_lid &&
325             mon_node->redir_port[port].redir_qp)
326                 qp = mon_node->redir_port[port].redir_qp;
327
328         return qp;
329 }
330
331 /**********************************************************************
332  * Given a node, a port, and an optional monitored node,
333  * return the appropriate lid to query that port
334  **********************************************************************/
335 static ib_net16_t
336 get_lid(osm_node_t * p_node, uint8_t port, __monitored_node_t * mon_node)
337 {
338         if (mon_node && mon_node->redir_tbl_size &&
339             port < mon_node->redir_tbl_size &&
340             mon_node->redir_port[port].redir_lid)
341                 return mon_node->redir_port[port].redir_lid;
342
343         switch (p_node->node_info.node_type) {
344         case IB_NODE_TYPE_CA:
345         case IB_NODE_TYPE_ROUTER:
346                 return osm_node_get_base_lid(p_node, port);
347         case IB_NODE_TYPE_SWITCH:
348                 return osm_node_get_base_lid(p_node, 0);
349         default:
350                 return 0;
351         }
352 }
353
354 /**********************************************************************
355  * Form and send the Port Counters MAD for a single port.
356  **********************************************************************/
357 static ib_api_status_t
358 osm_perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, ib_net16_t dest_lid,
359                         ib_net32_t dest_qp, uint8_t port, uint8_t mad_method,
360                         osm_madw_context_t * const p_context)
361 {
362         ib_api_status_t status = IB_SUCCESS;
363         ib_port_counters_t *port_counter = NULL;
364         ib_perfmgt_mad_t *pm_mad = NULL;
365         osm_madw_t *p_madw = NULL;
366
367         OSM_LOG_ENTER(perfmgr->log);
368
369         p_madw =
370             osm_mad_pool_get(perfmgr->mad_pool, perfmgr->bind_handle,
371                              MAD_BLOCK_SIZE, NULL);
372         if (p_madw == NULL)
373                 return (IB_INSUFFICIENT_MEMORY);
374
375         pm_mad = osm_madw_get_perfmgt_mad_ptr(p_madw);
376
377         /* build the mad */
378         pm_mad->header.base_ver = 1;
379         pm_mad->header.mgmt_class = IB_MCLASS_PERF;
380         pm_mad->header.class_ver = 1;
381         pm_mad->header.method = mad_method;
382         pm_mad->header.status = 0;
383         pm_mad->header.class_spec = 0;
384         pm_mad->header.trans_id =
385             cl_hton64((uint64_t) cl_atomic_inc(&(perfmgr->trans_id)));
386         pm_mad->header.attr_id = IB_MAD_ATTR_PORT_CNTRS;
387         pm_mad->header.resv = 0;
388         pm_mad->header.attr_mod = 0;
389
390         port_counter = (ib_port_counters_t *) & (pm_mad->data);
391         memset(port_counter, 0, sizeof(*port_counter));
392         port_counter->port_select = port;
393         port_counter->counter_select = 0xFFFF;
394
395         p_madw->mad_addr.dest_lid = dest_lid;
396         p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp;
397         p_madw->mad_addr.addr_type.gsi.remote_qkey =
398             cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY);
399         /* FIXME what about other partitions */
400         p_madw->mad_addr.addr_type.gsi.pkey_ix = 0;
401         p_madw->mad_addr.addr_type.gsi.service_level = 0;
402         p_madw->mad_addr.addr_type.gsi.global_route = FALSE;
403         p_madw->resp_expected = TRUE;
404
405         if (p_context)
406                 p_madw->context = *p_context;
407
408         status = osm_vendor_send(perfmgr->bind_handle, p_madw, TRUE);
409
410         if (status == IB_SUCCESS) {
411                 /* pause this thread if we have too many outstanding requests */
412                 cl_atomic_inc(&(perfmgr->outstanding_queries));
413                 if (perfmgr->outstanding_queries >
414                     perfmgr->max_outstanding_queries) {
415                         perfmgr->sweep_state = PERFMGR_SWEEP_SUSPENDED;
416                         cl_event_wait_on(&perfmgr->sig_query, EVENT_NO_TIMEOUT,
417                                          TRUE);
418                         perfmgr->sweep_state = PERFMGR_SWEEP_ACTIVE;
419                 }
420         }
421
422         OSM_LOG_EXIT(perfmgr->log);
423         return (status);
424 }
425
426 /**********************************************************************
427  * sweep the node_guid_tbl and collect the node guids to be tracked
428  **********************************************************************/
429 static void __collect_guids(cl_map_item_t * const p_map_item, void *context)
430 {
431         osm_node_t *node = (osm_node_t *) p_map_item;
432         uint64_t node_guid = cl_ntoh64(node->node_info.node_guid);
433         osm_perfmgr_t *pm = (osm_perfmgr_t *) context;
434         __monitored_node_t *mon_node = NULL;
435         uint32_t size;
436
437         OSM_LOG_ENTER(pm->log);
438
439         if (cl_qmap_get(&(pm->monitored_map), node_guid)
440             == cl_qmap_end(&(pm->monitored_map))) {
441                 /* if not already in our map add it */
442                 size = node->node_info.num_ports;
443                 mon_node = malloc(sizeof(*mon_node) + sizeof(redir_t) * size);
444                 if (!mon_node) {
445                         OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C06: "
446                                 "malloc failed: not handling node %s"
447                                 "(GUID 0x%" PRIx64 ")\n", node->print_desc, node_guid);
448                         goto Exit;
449                 }
450                 memset(mon_node, 0, sizeof(*mon_node) + sizeof(redir_t) * size);
451                 mon_node->guid = node_guid;
452                 mon_node->name = strdup(node->print_desc);
453                 mon_node->redir_tbl_size = size + 1;
454                 cl_qmap_insert(&(pm->monitored_map), node_guid,
455                                (cl_map_item_t *) mon_node);
456         }
457
458 Exit:
459         OSM_LOG_EXIT(pm->log);
460 }
461
462 /**********************************************************************
463  * query the Port Counters of all the nodes in the subnet.
464  **********************************************************************/
465 static void
466 __osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context)
467 {
468         ib_api_status_t status = IB_SUCCESS;
469         uint8_t port = 0, startport = 1;
470         osm_perfmgr_t *pm = (osm_perfmgr_t *) context;
471         osm_node_t *node = NULL;
472         __monitored_node_t *mon_node = (__monitored_node_t *) p_map_item;
473         osm_madw_context_t mad_context;
474         uint8_t num_ports = 0;
475         uint64_t node_guid = 0;
476         ib_net32_t remote_qp;
477
478         OSM_LOG_ENTER(pm->log);
479
480         cl_plock_acquire(pm->lock);
481         node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid));
482         if (!node) {
483                 OSM_LOG(pm->log, OSM_LOG_ERROR,
484                         "ERR 4C07: Node \"%s\" (guid 0x%" PRIx64
485                         ") no longer exists so removing from PerfMgr monitoring\n",
486                         mon_node->name, mon_node->guid);
487                 __mark_for_removal(pm, mon_node);
488                 goto Exit;
489         }
490
491         num_ports = osm_node_get_num_physp(node);
492         node_guid = cl_ntoh64(node->node_info.node_guid);
493
494         /* make sure we have a database object ready to store this information */
495         if (perfmgr_db_create_entry(pm->db, node_guid, num_ports,
496                                     node->print_desc) !=
497             PERFMGR_EVENT_DB_SUCCESS) {
498                 OSM_LOG(pm->log, OSM_LOG_ERROR,
499                         "ERR 4C08: DB create entry failed for 0x%"
500                         PRIx64 " (%s) : %s\n", node_guid, node->print_desc,
501                         strerror(errno));
502                 goto Exit;
503         }
504
505         /* if switch, check for enhanced port 0 */
506         if (osm_node_get_type(node) == IB_NODE_TYPE_SWITCH &&
507             node->sw &&
508             ib_switch_info_is_enhanced_port0(&node->sw->switch_info))
509                 startport = 0;
510
511         /* issue the query for each port */
512         for (port = startport; port < num_ports; port++) {
513                 ib_net16_t lid;
514
515                 if (!osm_node_get_physp_ptr(node, port))
516                         continue;
517
518                 lid = get_lid(node, port, mon_node);
519                 if (lid == 0) {
520                         OSM_LOG(pm->log, OSM_LOG_DEBUG, "WARN: node 0x%" PRIx64
521                                 " port %d (%s): port out of range, skipping\n",
522                                 cl_ntoh64(node->node_info.node_guid), port,
523                                 node->print_desc);
524                         continue;
525                 }
526
527                 remote_qp = get_qp(mon_node, port);
528
529                 mad_context.perfmgr_context.node_guid = node_guid;
530                 mad_context.perfmgr_context.port = port;
531                 mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_GET;
532 #if ENABLE_OSM_PERF_MGR_PROFILE
533                 gettimeofday(&(mad_context.perfmgr_context.query_start), NULL);
534 #endif
535                 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%"
536                         PRIx64 " port %d (lid %u) (%s)\n", node_guid, port,
537                         cl_ntoh16(lid), node->print_desc);
538                 status =
539                     osm_perfmgr_send_pc_mad(pm, lid, remote_qp, port,
540                                             IB_MAD_METHOD_GET, &mad_context);
541                 if (status != IB_SUCCESS)
542                         OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C09: "
543                                 "Failed to issue port counter query for node 0x%"
544                                 PRIx64 " port %d (%s)\n",
545                                 node->node_info.node_guid, port,
546                                 node->print_desc);
547         }
548 Exit:
549         cl_plock_release(pm->lock);
550         OSM_LOG_EXIT(pm->log);
551 }
552
553 /**********************************************************************
554  * Discovery stuff.
555  * Basically this code should not be here, but merged with main OpenSM
556  **********************************************************************/
557 extern void osm_drop_mgr_process(IN osm_sm_t *sm);
558
559 static int sweep_hop_1(osm_sm_t * sm)
560 {
561         ib_api_status_t status = IB_SUCCESS;
562         osm_bind_handle_t h_bind;
563         osm_madw_context_t context;
564         osm_node_t *p_node;
565         osm_port_t *p_port;
566         osm_physp_t *p_physp;
567         osm_dr_path_t *p_dr_path;
568         osm_dr_path_t hop_1_path;
569         ib_net64_t port_guid;
570         uint8_t port_num;
571         uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX];
572         uint8_t num_ports;
573         osm_physp_t *p_ext_physp;
574
575         port_guid = sm->p_subn->sm_port_guid;
576
577         p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
578         if (!p_port) {
579                 OSM_LOG(sm->p_log, OSM_LOG_ERROR,
580                         "ERR 4C81: No SM port object\n");
581                 return -1;
582         }
583
584         p_node = p_port->p_node;
585         port_num = ib_node_info_get_local_port_num(&p_node->node_info);
586
587         OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
588                 "Probing hop 1 on local port %u\n", port_num);
589
590         p_physp = osm_node_get_physp_ptr(p_node, port_num);
591
592         CL_ASSERT(p_physp);
593
594         p_dr_path = osm_physp_get_dr_path_ptr(p_physp);
595         h_bind = osm_dr_path_get_bind_handle(p_dr_path);
596
597         CL_ASSERT(h_bind != OSM_BIND_INVALID_HANDLE);
598
599         memset(path_array, 0, sizeof(path_array));
600         /* the hop_1 operations depend on the type of our node.
601          * Currently - legal nodes that can host SM are SW and CA */
602         switch (osm_node_get_type(p_node)) {
603         case IB_NODE_TYPE_CA:
604         case IB_NODE_TYPE_ROUTER:
605                 memset(&context, 0, sizeof(context));
606                 context.ni_context.node_guid = osm_node_get_node_guid(p_node);
607                 context.ni_context.port_num = port_num;
608
609                 path_array[1] = port_num;
610
611                 osm_dr_path_init(&hop_1_path, h_bind, 1, path_array);
612                 status = osm_req_get(sm, &hop_1_path,
613                                      IB_MAD_ATTR_NODE_INFO, 0,
614                                      CL_DISP_MSGID_NONE, &context);
615
616                 if (status != IB_SUCCESS)
617                         OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4C82: "
618                                 "Request for NodeInfo failed\n");
619                 break;
620
621         case IB_NODE_TYPE_SWITCH:
622                 /* Need to go over all the ports of the switch, and send a node_info
623                  * from them. This doesn't include the port 0 of the switch, which
624                  * hosts the SM.
625                  * Note: We'll send another switchInfo on port 0, since if no ports
626                  * are connected, we still want to get some response, and have the
627                  * subnet come up.
628                  */
629                 num_ports = osm_node_get_num_physp(p_node);
630                 for (port_num = 0; port_num < num_ports; port_num++) {
631                         /* go through the port only if the port is not DOWN */
632                         p_ext_physp = osm_node_get_physp_ptr(p_node, port_num);
633                         if (!p_ext_physp || ib_port_info_get_port_state
634                             (&p_ext_physp->port_info) <= IB_LINK_DOWN)
635                                 continue;
636
637                         memset(&context, 0, sizeof(context));
638                         context.ni_context.node_guid =
639                             osm_node_get_node_guid(p_node);
640                         context.ni_context.port_num = port_num;
641
642                         path_array[1] = port_num;
643
644                         osm_dr_path_init(&hop_1_path, h_bind, 1, path_array);
645                         status = osm_req_get(sm, &hop_1_path,
646                                              IB_MAD_ATTR_NODE_INFO, 0,
647                                              CL_DISP_MSGID_NONE, &context);
648
649                         if (status != IB_SUCCESS)
650                                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4C82: "
651                                         "Request for NodeInfo failed\n");
652                 }
653                 break;
654
655         default:
656                 OSM_LOG(sm->p_log, OSM_LOG_ERROR,
657                         "ERR 4C83: Unknown node type %d\n",
658                         osm_node_get_type(p_node));
659         }
660
661         return (status);
662 }
663
664 static unsigned is_sm_port_down(osm_sm_t * const sm)
665 {
666         ib_net64_t port_guid;
667         osm_port_t *p_port;
668
669         port_guid = sm->p_subn->sm_port_guid;
670         if (port_guid == 0)
671                 return 1;
672
673         CL_PLOCK_ACQUIRE(sm->p_lock);
674         p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
675         if (!p_port) {
676                 CL_PLOCK_RELEASE(sm->p_lock);
677                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4C85: "
678                         "SM port with GUID:%016" PRIx64 " is unknown\n",
679                         cl_ntoh64(port_guid));
680                 return 1;
681         }
682         CL_PLOCK_RELEASE(sm->p_lock);
683
684         return osm_physp_get_port_state(p_port->p_physp) == IB_LINK_DOWN;
685 }
686
687 static int sweep_hop_0(osm_sm_t * const sm)
688 {
689         ib_api_status_t status;
690         osm_dr_path_t dr_path;
691         osm_bind_handle_t h_bind;
692         uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX];
693
694         memset(path_array, 0, sizeof(path_array));
695
696         h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl);
697         if (h_bind == OSM_BIND_INVALID_HANDLE) {
698                 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "No bound ports.\n");
699                 return -1;
700         }
701
702         osm_dr_path_init(&dr_path, h_bind, 0, path_array);
703         status = osm_req_get(sm, &dr_path, IB_MAD_ATTR_NODE_INFO, 0,
704                              CL_DISP_MSGID_NONE, NULL);
705
706         if (status != IB_SUCCESS)
707                 OSM_LOG(sm->p_log, OSM_LOG_ERROR,
708                         "ERR 4C86: Request for NodeInfo failed\n");
709
710         return (status);
711 }
712
713 static void reset_node_count(cl_map_item_t * const p_map_item, void *cxt)
714 {
715         osm_node_t *p_node = (osm_node_t *) p_map_item;
716         p_node->discovery_count = 0;
717 }
718
719 static void reset_port_count(cl_map_item_t * const p_map_item, void *cxt)
720 {
721         osm_port_t *p_port = (osm_port_t *) p_map_item;
722         p_port->discovery_count = 0;
723 }
724
725 static void reset_switch_count(cl_map_item_t * const p_map_item, void *cxt)
726 {
727         osm_switch_t *p_sw = (osm_switch_t *) p_map_item;
728         p_sw->discovery_count = 0;
729         p_sw->need_update = 0;
730 }
731
732 static int perfmgr_discovery(osm_opensm_t * osm)
733 {
734         int ret;
735
736         CL_PLOCK_ACQUIRE(&osm->lock);
737         cl_qmap_apply_func(&osm->subn.node_guid_tbl, reset_node_count, NULL);
738         cl_qmap_apply_func(&osm->subn.port_guid_tbl, reset_port_count, NULL);
739         cl_qmap_apply_func(&osm->subn.sw_guid_tbl, reset_switch_count, NULL);
740         CL_PLOCK_RELEASE(&osm->lock);
741
742         osm->subn.in_sweep_hop_0 = TRUE;
743
744         ret = sweep_hop_0(&osm->sm);
745         if (ret)
746                 goto _exit;
747
748         if (wait_for_pending_transactions(&osm->stats))
749                 goto _exit;
750
751         if (is_sm_port_down(&osm->sm)) {
752                 OSM_LOG(&osm->log, OSM_LOG_VERBOSE, "SM port is down\n");
753                 goto _drop;
754         }
755
756         osm->subn.in_sweep_hop_0 = FALSE;
757
758         ret = sweep_hop_1(&osm->sm);
759         if (ret)
760                 goto _exit;
761
762         if (wait_for_pending_transactions(&osm->stats))
763                 goto _exit;
764
765 _drop:
766         osm_drop_mgr_process(&osm->sm);
767
768 _exit:
769         return ret;
770 }
771
772 /**********************************************************************
773  * Main PerfMgr processor - query the performance counters.
774  **********************************************************************/
775 void osm_perfmgr_process(osm_perfmgr_t * pm)
776 {
777 #if ENABLE_OSM_PERF_MGR_PROFILE
778         struct timeval before, after;
779 #endif
780
781         if (pm->state != PERFMGR_STATE_ENABLED)
782                 return;
783
784         if (pm->subn->sm_state == IB_SMINFO_STATE_STANDBY ||
785             pm->subn->sm_state == IB_SMINFO_STATE_NOTACTIVE)
786                 perfmgr_discovery(pm->subn->p_osm);
787
788 #if ENABLE_OSM_PERF_MGR_PROFILE
789         gettimeofday(&before, NULL);
790 #endif
791         pm->sweep_state = PERFMGR_SWEEP_ACTIVE;
792         /* With the global lock held collect the node guids */
793         /* FIXME we should be able to track SA notices
794          * and not have to sweep the node_guid_tbl each pass
795          */
796         OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Gathering PerfMgr stats\n");
797         cl_plock_acquire(pm->lock);
798         cl_qmap_apply_func(&(pm->subn->node_guid_tbl),
799                            __collect_guids, (void *)pm);
800         cl_plock_release(pm->lock);
801
802         /* then for each node query their counters */
803         cl_qmap_apply_func(&(pm->monitored_map),
804                            __osm_perfmgr_query_counters, (void *)pm);
805
806         /* Clean out any nodes found to be removed during the
807          * sweep
808          */
809         __remove_marked_nodes(pm);
810
811 #if ENABLE_OSM_PERF_MGR_PROFILE
812         /* spin on outstanding queries */
813         while (pm->outstanding_queries > 0)
814                 cl_event_wait_on(&pm->sig_sweep, 1000, TRUE);
815
816         gettimeofday(&after, NULL);
817         diff_time(&before, &after, &after);
818         osm_log(pm->log, OSM_LOG_INFO,
819                 "PerfMgr total sweep time : %ld.%06ld s\n"
820                 "        fastest mad      : %g us\n"
821                 "        slowest mad      : %g us\n"
822                 "        average mad      : %g us\n",
823                 after.tv_sec, after.tv_usec,
824                 perfmgr_mad_stats.fastest_us,
825                 perfmgr_mad_stats.slowest_us, perfmgr_mad_stats.avg_us);
826         perfmgr_clear_mad_stats();
827 #endif
828
829         pm->sweep_state = PERFMGR_SWEEP_SLEEP;
830 }
831
832 /**********************************************************************
833  * PerfMgr timer - loop continuously and signal SM to run PerfMgr
834  * processor.
835  **********************************************************************/
836 static void perfmgr_sweep(void *arg)
837 {
838         osm_perfmgr_t *pm = arg;
839
840         if (pm->state == PERFMGR_STATE_ENABLED)
841                 osm_sm_signal(pm->sm, OSM_SIGNAL_PERFMGR_SWEEP);
842         cl_timer_start(&pm->sweep_timer, pm->sweep_time_s * 1000);
843 }
844
845 /**********************************************************************
846  **********************************************************************/
847 void osm_perfmgr_shutdown(osm_perfmgr_t * const pm)
848 {
849         OSM_LOG_ENTER(pm->log);
850         cl_timer_stop(&pm->sweep_timer);
851         osm_perfmgr_mad_unbind(pm);
852         OSM_LOG_EXIT(pm->log);
853 }
854
855 /**********************************************************************
856  **********************************************************************/
857 void osm_perfmgr_destroy(osm_perfmgr_t * const pm)
858 {
859         OSM_LOG_ENTER(pm->log);
860         perfmgr_db_destroy(pm->db);
861         cl_timer_destroy(&pm->sweep_timer);
862         OSM_LOG_EXIT(pm->log);
863 }
864
865 /**********************************************************************
866  * Detect if someone else on the network could have cleared the counters
867  * without us knowing.  This is easy to detect because the counters never wrap
868  * but are "sticky"
869  *
870  * The one time this will not work is if the port is getting errors fast enough
871  * to have the reading overtake the previous reading.  In this case counters
872  * will be missed.
873  **********************************************************************/
874 static void
875 osm_perfmgr_check_oob_clear(osm_perfmgr_t * pm, __monitored_node_t *mon_node,
876                             uint8_t port, perfmgr_db_err_reading_t * cr,
877                             perfmgr_db_data_cnt_reading_t * dc)
878 {
879         perfmgr_db_err_reading_t prev_err;
880         perfmgr_db_data_cnt_reading_t prev_dc;
881
882         if (perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_err)
883             != PERFMGR_EVENT_DB_SUCCESS) {
884                 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Failed to find previous "
885                         "error reading for %s (guid 0x%" PRIx64 ") port %u\n",
886                         mon_node->name, mon_node->guid, port);
887                 return;
888         }
889
890         if (cr->symbol_err_cnt < prev_err.symbol_err_cnt ||
891             cr->link_err_recover < prev_err.link_err_recover ||
892             cr->link_downed < prev_err.link_downed ||
893             cr->rcv_err < prev_err.rcv_err ||
894             cr->rcv_rem_phys_err < prev_err.rcv_rem_phys_err ||
895             cr->rcv_switch_relay_err < prev_err.rcv_switch_relay_err ||
896             cr->xmit_discards < prev_err.xmit_discards ||
897             cr->xmit_constraint_err < prev_err.xmit_constraint_err ||
898             cr->rcv_constraint_err < prev_err.rcv_constraint_err ||
899             cr->link_integrity < prev_err.link_integrity ||
900             cr->buffer_overrun < prev_err.buffer_overrun ||
901             cr->vl15_dropped < prev_err.vl15_dropped) {
902                 OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C0A: "
903                         "Detected an out of band error clear "
904                         "on %s (0x%" PRIx64 ") port %u\n",
905                         mon_node->name, mon_node->guid, port);
906                 perfmgr_db_clear_prev_err(pm->db, mon_node->guid, port);
907         }
908
909         /* FIXME handle extended counters */
910         if (perfmgr_db_get_prev_dc(pm->db, mon_node->guid, port, &prev_dc)
911             != PERFMGR_EVENT_DB_SUCCESS) {
912                 OSM_LOG(pm->log, OSM_LOG_VERBOSE,
913                         "Failed to find previous data count "
914                         "reading for %s (0x%" PRIx64 ") port %u\n",
915                         mon_node->name, mon_node->guid, port);
916                 return;
917         }
918
919         if (dc->xmit_data < prev_dc.xmit_data ||
920             dc->rcv_data < prev_dc.rcv_data ||
921             dc->xmit_pkts < prev_dc.xmit_pkts ||
922             dc->rcv_pkts < prev_dc.rcv_pkts) {
923                 OSM_LOG(pm->log, OSM_LOG_ERROR,
924                         "PerfMgr: ERR 4C0B: Detected an out of band data counter "
925                         "clear on node %s (0x%" PRIx64 ") port %u\n",
926                         mon_node->name, mon_node->guid, port);
927                 perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port);
928         }
929 }
930
931 /**********************************************************************
932  * Return 1 if the value is "close" to overflowing
933  **********************************************************************/
934 static int counter_overflow_4(uint8_t val)
935 {
936         return (val >= 10);
937 }
938
939 static int counter_overflow_8(uint8_t val)
940 {
941         return (val >= (UINT8_MAX - (UINT8_MAX / 4)));
942 }
943
944 static int counter_overflow_16(ib_net16_t val)
945 {
946         return (cl_ntoh16(val) >= (UINT16_MAX - (UINT16_MAX / 4)));
947 }
948
949 static int counter_overflow_32(ib_net32_t val)
950 {
951         return (cl_ntoh32(val) >= (UINT32_MAX - (UINT32_MAX / 4)));
952 }
953
954 /**********************************************************************
955  * Check if the port counters have overflowed and if so issue a clear
956  * MAD to the port.
957  **********************************************************************/
958 static void
959 osm_perfmgr_check_overflow(osm_perfmgr_t * pm, __monitored_node_t *mon_node,
960                            uint8_t port, ib_port_counters_t * pc)
961 {
962         osm_madw_context_t mad_context;
963         ib_api_status_t status;
964         ib_net32_t remote_qp;
965
966         OSM_LOG_ENTER(pm->log);
967
968         if (counter_overflow_16(pc->symbol_err_cnt) ||
969             counter_overflow_8(pc->link_err_recover) ||
970             counter_overflow_8(pc->link_downed) ||
971             counter_overflow_16(pc->rcv_err) ||
972             counter_overflow_16(pc->rcv_rem_phys_err) ||
973             counter_overflow_16(pc->rcv_switch_relay_err) ||
974             counter_overflow_16(pc->xmit_discards) ||
975             counter_overflow_8(pc->xmit_constraint_err) ||
976             counter_overflow_8(pc->rcv_constraint_err) ||
977             counter_overflow_4(PC_LINK_INT(pc->link_int_buffer_overrun)) ||
978             counter_overflow_4(PC_BUF_OVERRUN(pc->link_int_buffer_overrun)) ||
979             counter_overflow_16(pc->vl15_dropped) ||
980             counter_overflow_32(pc->xmit_data) ||
981             counter_overflow_32(pc->rcv_data) ||
982             counter_overflow_32(pc->xmit_pkts) ||
983             counter_overflow_32(pc->rcv_pkts)) {
984                 osm_node_t *p_node = NULL;
985                 ib_net16_t lid = 0;
986
987                 osm_log(pm->log, OSM_LOG_VERBOSE,
988                         "PerfMgr: Counter overflow: %s (0x%" PRIx64
989                         ") port %d; clearing counters\n",
990                         mon_node->name, mon_node->guid, port);
991
992                 cl_plock_acquire(pm->lock);
993                 p_node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid));
994                 lid = get_lid(p_node, port, mon_node);
995                 cl_plock_release(pm->lock);
996                 if (lid == 0) {
997                         OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C0C: "
998                                 "Failed to clear counters for %s (0x%"
999                                 PRIx64 ") port %d; failed to get lid\n",
1000                                 mon_node->name, mon_node->guid, port);
1001                         goto Exit;
1002                 }
1003
1004                 remote_qp = get_qp(NULL, port);
1005
1006                 mad_context.perfmgr_context.node_guid = mon_node->guid;
1007                 mad_context.perfmgr_context.port = port;
1008                 mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET;
1009                 /* clear port counters */
1010                 status =
1011                     osm_perfmgr_send_pc_mad(pm, lid, remote_qp, port,
1012                                             IB_MAD_METHOD_SET, &mad_context);
1013                 if (status != IB_SUCCESS)
1014                         OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C11: "
1015                                 "Failed to send clear counters MAD for %s (0x%"
1016                                 PRIx64 ") port %d\n",
1017                                 mon_node->name, mon_node->guid, port);
1018
1019                 perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port);
1020         }
1021
1022 Exit:
1023         OSM_LOG_EXIT(pm->log);
1024 }
1025
1026 /**********************************************************************
1027  * Check values for logging of errors
1028  **********************************************************************/
1029 static void
1030 osm_perfmgr_log_events(osm_perfmgr_t * pm, __monitored_node_t *mon_node, uint8_t port,
1031                        perfmgr_db_err_reading_t * reading)
1032 {
1033         perfmgr_db_err_reading_t prev_read;
1034         time_t time_diff = 0;
1035         perfmgr_db_err_t err =
1036             perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_read);
1037
1038         if (err != PERFMGR_EVENT_DB_SUCCESS) {
1039                 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Failed to find previous "
1040                         "reading for %s (0x%" PRIx64 ") port %u\n",
1041                         mon_node->name, mon_node->guid, port);
1042                 return;
1043         }
1044         time_diff = (reading->time - prev_read.time);
1045
1046         /* FIXME these events should be defineable by the user in a config
1047          * file somewhere. */
1048         if (reading->symbol_err_cnt > prev_read.symbol_err_cnt)
1049                 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C0D: "
1050                         "Found %" PRIu64 " Symbol errors in %lu sec on %s (0x%"
1051                         PRIx64 ") port %u\n",
1052                         (reading->symbol_err_cnt - prev_read.symbol_err_cnt),
1053                         time_diff, mon_node->name, mon_node->guid, port);
1054
1055         if (reading->rcv_err > prev_read.rcv_err)
1056                 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C0E: "
1057                         "Found %" PRIu64
1058                         " Receive errors in %lu sec on %s (0x%" PRIx64
1059                         ") port %u\n", (reading->rcv_err - prev_read.rcv_err),
1060                         time_diff, mon_node->name, mon_node->guid, port);
1061
1062         if (reading->xmit_discards > prev_read.xmit_discards)
1063                 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C0F: "
1064                         "Found %" PRIu64 " Xmit Discards in %lu sec on %s (0x%"
1065                         PRIx64 ") port %u\n",
1066                         (reading->xmit_discards - prev_read.xmit_discards),
1067                         time_diff, mon_node->name, mon_node->guid, port);
1068 }
1069
1070 /**********************************************************************
1071  * The dispatcher uses a thread pool which will call this function when
1072  * we have a thread available to process our mad received from the wire.
1073  **********************************************************************/
1074 static void osm_pc_rcv_process(void *context, void *data)
1075 {
1076         osm_perfmgr_t *const pm = (osm_perfmgr_t *) context;
1077         osm_madw_t *p_madw = (osm_madw_t *) data;
1078         osm_madw_context_t *mad_context = &(p_madw->context);
1079         ib_port_counters_t *wire_read =
1080             (ib_port_counters_t *) & (osm_madw_get_perfmgt_mad_ptr(p_madw)->
1081                                       data);
1082         ib_mad_t *p_mad = osm_madw_get_mad_ptr(p_madw);
1083         uint64_t node_guid = mad_context->perfmgr_context.node_guid;
1084         uint8_t port = mad_context->perfmgr_context.port;
1085         perfmgr_db_err_reading_t err_reading;
1086         perfmgr_db_data_cnt_reading_t data_reading;
1087         cl_map_item_t *p_node;
1088         __monitored_node_t *p_mon_node;
1089
1090         OSM_LOG_ENTER(pm->log);
1091
1092         /* go ahead and get the monitored node struct to have the printable
1093          * name if needed in messages
1094          */
1095         if ((p_node = cl_qmap_get(&(pm->monitored_map), node_guid)) ==
1096             cl_qmap_end(&(pm->monitored_map))) {
1097                 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C12: GUID 0x%016"
1098                         PRIx64 " not found in monitored map\n",
1099                         node_guid);
1100                 goto Exit;
1101         }
1102         p_mon_node = (__monitored_node_t *) p_node;
1103
1104         OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1105                 "Processing received MAD status 0x%x context 0x%"
1106                 PRIx64 " port %u\n", p_mad->status, node_guid, port);
1107
1108         /* Response could also be redirection (IBM eHCA PMA does this) */
1109         if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
1110                 char gid_str[INET6_ADDRSTRLEN];
1111                 ib_class_port_info_t *cpi =
1112                     (ib_class_port_info_t *) &
1113                     (osm_madw_get_perfmgt_mad_ptr(p_madw)->data);
1114                 ib_api_status_t status;
1115
1116                 OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1117                         "Redirection to LID %u GID %s QP 0x%x received\n",
1118                         cl_ntoh16(cpi->redir_lid),
1119                         inet_ntop(AF_INET6, cpi->redir_gid.raw, gid_str,
1120                                   sizeof gid_str),
1121                         cl_ntoh32(cpi->redir_qp));
1122
1123                 /* LID or GID redirection ? */
1124                 /* For GID redirection, need to get PathRecord from SA */
1125                 if (cpi->redir_lid == 0) {
1126                         OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1127                                 "GID redirection not currently implemented!\n");
1128                         goto Exit;
1129                 }
1130
1131                 if (!pm->subn->opt.perfmgr_redir) {
1132                                 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C16: "
1133                                        "redirection requested but disabled\n");
1134                         goto Exit;
1135                 }
1136
1137                 /* LID redirection support (easier than GID redirection) */
1138                 cl_plock_acquire(pm->lock);
1139                 /* Now, validate port number */
1140                 if (port > p_mon_node->redir_tbl_size) {
1141                         cl_plock_release(pm->lock);
1142                         OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C13: "
1143                                 "Invalid port num %d for GUID 0x%016"
1144                                 PRIx64 " num ports %d\n", port, node_guid,
1145                                 p_mon_node->redir_tbl_size);
1146                         goto Exit;
1147                 }
1148                 p_mon_node->redir_port[port].redir_lid = cpi->redir_lid;
1149                 p_mon_node->redir_port[port].redir_qp = cpi->redir_qp;
1150                 cl_plock_release(pm->lock);
1151
1152                 /* Finally, reissue the query to the redirected location */
1153                 status =
1154                     osm_perfmgr_send_pc_mad(pm, cpi->redir_lid, cpi->redir_qp,
1155                                             port,
1156                                             mad_context->perfmgr_context.
1157                                             mad_method, mad_context);
1158                 if (status != IB_SUCCESS)
1159                         OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C14: "
1160                                 "Failed to send redirected MAD with method 0x%x for node 0x%"
1161                                 PRIx64 " port %d\n",
1162                                 mad_context->perfmgr_context.mad_method,
1163                                 node_guid, port);
1164                 goto Exit;
1165         }
1166
1167         CL_ASSERT(p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS);
1168
1169         perfmgr_db_fill_err_read(wire_read, &err_reading);
1170         /* FIXME separate query for extended counters if they are supported
1171          * on the port.
1172          */
1173         perfmgr_db_fill_data_cnt_read_pc(wire_read, &data_reading);
1174
1175         /* detect an out of band clear on the port */
1176         if (mad_context->perfmgr_context.mad_method != IB_MAD_METHOD_SET)
1177                 osm_perfmgr_check_oob_clear(pm, p_mon_node, port,
1178                                             &err_reading, &data_reading);
1179
1180         /* log any critical events from this reading */
1181         osm_perfmgr_log_events(pm, p_mon_node, port, &err_reading);
1182
1183         if (mad_context->perfmgr_context.mad_method == IB_MAD_METHOD_GET) {
1184                 perfmgr_db_add_err_reading(pm->db, node_guid, port,
1185                                            &err_reading);
1186                 perfmgr_db_add_dc_reading(pm->db, node_guid, port,
1187                                           &data_reading);
1188         } else {
1189                 perfmgr_db_clear_prev_err(pm->db, node_guid, port);
1190                 perfmgr_db_clear_prev_dc(pm->db, node_guid, port);
1191         }
1192
1193         osm_perfmgr_check_overflow(pm, p_mon_node, port, wire_read);
1194
1195 #if ENABLE_OSM_PERF_MGR_PROFILE
1196         do {
1197                 struct timeval proc_time;
1198                 gettimeofday(&proc_time, NULL);
1199                 diff_time(&(p_madw->context.perfmgr_context.query_start),
1200                           &proc_time, &proc_time);
1201                 update_mad_stats(&proc_time);
1202         } while (0);
1203 #endif
1204
1205 Exit:
1206         osm_mad_pool_put(pm->mad_pool, p_madw);
1207
1208         OSM_LOG_EXIT(pm->log);
1209 }
1210
1211 /**********************************************************************
1212  * Initialize the PerfMgr object
1213  **********************************************************************/
1214 ib_api_status_t
1215 osm_perfmgr_init(osm_perfmgr_t * const pm, osm_opensm_t *osm,
1216                  const osm_subn_opt_t * const p_opt)
1217 {
1218         ib_api_status_t status = IB_SUCCESS;
1219
1220         OSM_LOG_ENTER(&osm->log);
1221
1222         OSM_LOG(&osm->log, OSM_LOG_VERBOSE, "Initializing PerfMgr\n");
1223
1224         memset(pm, 0, sizeof(*pm));
1225
1226         cl_event_construct(&pm->sig_sweep);
1227         cl_event_init(&pm->sig_sweep, FALSE);
1228         pm->subn = &osm->subn;
1229         pm->sm = &osm->sm;
1230         pm->log = &osm->log;
1231         pm->mad_pool = &osm->mad_pool;
1232         pm->vendor = osm->p_vendor;
1233         pm->trans_id = OSM_PERFMGR_INITIAL_TID_VALUE;
1234         pm->lock = &osm->lock;
1235         pm->state =
1236             p_opt->perfmgr ? PERFMGR_STATE_ENABLED : PERFMGR_STATE_DISABLE;
1237         pm->sweep_time_s = p_opt->perfmgr_sweep_time_s;
1238         pm->max_outstanding_queries = p_opt->perfmgr_max_outstanding_queries;
1239         pm->osm = osm;
1240
1241         status = cl_timer_init(&pm->sweep_timer, perfmgr_sweep, pm);
1242         if (status != IB_SUCCESS)
1243                 goto Exit;
1244
1245         pm->db = perfmgr_db_construct(pm);
1246         if (!pm->db) {
1247                 pm->state = PERFMGR_STATE_NO_DB;
1248                 goto Exit;
1249         }
1250
1251         pm->pc_disp_h = cl_disp_register(&osm->disp, OSM_MSG_MAD_PORT_COUNTERS,
1252                                          osm_pc_rcv_process, pm);
1253         if (pm->pc_disp_h == CL_DISP_INVALID_HANDLE)
1254                 goto Exit;
1255
1256         __init_monitored_nodes(pm);
1257
1258         cl_timer_start(&pm->sweep_timer, pm->sweep_time_s * 1000);
1259
1260 Exit:
1261         OSM_LOG_EXIT(pm->log);
1262         return (status);
1263 }
1264
1265 /**********************************************************************
1266  * Clear the counters from the db
1267  **********************************************************************/
1268 void osm_perfmgr_clear_counters(osm_perfmgr_t * pm)
1269 {
1270         /**
1271          * FIXME todo issue clear on the fabric?
1272          */
1273         perfmgr_db_clear_counters(pm->db);
1274         osm_log(pm->log, OSM_LOG_INFO, "PerfMgr counters cleared\n");
1275 }
1276
1277 /*******************************************************************
1278  * Have the DB dump its information to the file specified
1279  *******************************************************************/
1280 void osm_perfmgr_dump_counters(osm_perfmgr_t * pm, perfmgr_db_dump_t dump_type)
1281 {
1282         char path[256];
1283         char *file_name;
1284         if (pm->subn->opt.event_db_dump_file)
1285                 file_name = pm->subn->opt.event_db_dump_file;
1286         else {
1287                 snprintf(path, sizeof(path), "%s/%s",
1288                          pm->subn->opt.dump_files_dir,
1289                          OSM_PERFMGR_DEFAULT_DUMP_FILE);
1290                 file_name = path;
1291         }
1292         if (perfmgr_db_dump(pm->db, file_name, dump_type) != 0)
1293                 OSM_LOG(pm->log, OSM_LOG_ERROR, "Failed to dump file %s : %s",
1294                         file_name, strerror(errno));
1295 }
1296
1297 /*******************************************************************
1298  * Have the DB print its information to the fp specified
1299  *******************************************************************/
1300 void
1301 osm_perfmgr_print_counters(osm_perfmgr_t *pm, char *nodename, FILE *fp)
1302 {
1303         uint64_t guid = strtoull(nodename, NULL, 0);
1304         if (guid == 0 && errno == EINVAL) {
1305                 perfmgr_db_print_by_name(pm->db, nodename, fp);
1306         } else {
1307                 perfmgr_db_print_by_guid(pm->db, guid, fp);
1308         }
1309 }
1310
1311 #endif                          /* ENABLE_OSM_PERF_MGR */