]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/ofed/opensm/opensm/osm_perfmgr.c
Import DTS files for riscv from Linux 5.4
[FreeBSD/FreeBSD.git] / contrib / ofed / opensm / opensm / osm_perfmgr.c
1 /*
2  * Copyright (c) 2007 The Regents of the University of California.
3  * Copyright (c) 2007-2009 Voltaire, Inc. All rights reserved.
4  * Copyright (c) 2009,2010 HNR Consulting. All rights reserved.
5  * Copyright (c) 2013 Lawrence Livermore National Security. All rights reserved.
6  * Copyright (c) 2011-2014 Mellanox Technologies LTD. All rights reserved.
7  *
8  * This software is available to you under a choice of one of two
9  * licenses.  You may choose to be licensed under the terms of the GNU
10  * General Public License (GPL) Version 2, available from the file
11  * COPYING in the main directory of this source tree, or the
12  * OpenIB.org BSD license below:
13  *
14  *     Redistribution and use in source and binary forms, with or
15  *     without modification, are permitted provided that the following
16  *     conditions are met:
17  *
18  *      - Redistributions of source code must retain the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer.
21  *
22  *      - Redistributions in binary form must reproduce the above
23  *        copyright notice, this list of conditions and the following
24  *        disclaimer in the documentation and/or other materials
25  *        provided with the distribution.
26  *
27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34  * SOFTWARE.
35  *
36  */
37
38 /*
39  * Abstract:
40  *    Implementation of osm_perfmgr_t.
41  * This object implements an IBA performance manager.
42  *
43  * Author:
44  *    Ira Weiny, LLNL
45  */
46
47 #if HAVE_CONFIG_H
48 #  include <config.h>
49 #endif                          /* HAVE_CONFIG_H */
50
51 #ifdef ENABLE_OSM_PERF_MGR
52 #include <stdlib.h>
53 #include <stdint.h>
54 #include <string.h>
55 #include <poll.h>
56 #include <errno.h>
57 #include <sys/time.h>
58 #include <netinet/in.h>
59 #include <float.h>
60 #include <arpa/inet.h>
61 #include <sys/socket.h>
62 #include <iba/ib_types.h>
63 #include <complib/cl_debug.h>
64 #include <complib/cl_thread.h>
65 #include <opensm/osm_file_ids.h>
66 #define FILE_ID OSM_FILE_PERFMGR_C
67 #include <vendor/osm_vendor_api.h>
68 #include <opensm/osm_perfmgr.h>
69 #include <opensm/osm_log.h>
70 #include <opensm/osm_node.h>
71 #include <opensm/osm_opensm.h>
72 #include <opensm/osm_helper.h>
73
74 #define PERFMGR_INITIAL_TID_VALUE 0xcafe
75
76 #ifdef ENABLE_OSM_PERF_MGR_PROFILE
77 struct {
78         double fastest_us;
79         double slowest_us;
80         double avg_us;
81         uint64_t num;
82 } perfmgr_mad_stats = {
83 fastest_us: DBL_MAX, slowest_us: DBL_MIN, avg_us: 0, num:0};
84
85 /* diff must be something which can fit in a susecond_t */
86 static inline void update_mad_stats(struct timeval *diff)
87 {
88         double new = (diff->tv_sec * 1000000) + diff->tv_usec;
89         if (new < perfmgr_mad_stats.fastest_us)
90                 perfmgr_mad_stats.fastest_us = new;
91         if (new > perfmgr_mad_stats.slowest_us)
92                 perfmgr_mad_stats.slowest_us = new;
93
94         perfmgr_mad_stats.avg_us =
95             ((perfmgr_mad_stats.avg_us * perfmgr_mad_stats.num) + new)
96             / (perfmgr_mad_stats.num + 1);
97         perfmgr_mad_stats.num++;
98 }
99
100 static inline void clear_mad_stats(void)
101 {
102         perfmgr_mad_stats.fastest_us = DBL_MAX;
103         perfmgr_mad_stats.slowest_us = DBL_MIN;
104         perfmgr_mad_stats.avg_us = 0;
105         perfmgr_mad_stats.num = 0;
106 }
107
108 /* after and diff can be the same struct */
109 static inline void diff_time(struct timeval *before, struct timeval *after,
110                              struct timeval *diff)
111 {
112         struct timeval tmp = *after;
113         if (tmp.tv_usec < before->tv_usec) {
114                 tmp.tv_sec--;
115                 tmp.tv_usec += 1000000;
116         }
117         diff->tv_sec = tmp.tv_sec - before->tv_sec;
118         diff->tv_usec = tmp.tv_usec - before->tv_usec;
119 }
120 #endif
121
122 /**********************************************************************
123  * Internal helper functions
124  **********************************************************************/
125 static void init_monitored_nodes(osm_perfmgr_t * pm)
126 {
127         cl_qmap_init(&pm->monitored_map);
128         pm->remove_list = NULL;
129         cl_event_construct(&pm->sig_query);
130         cl_event_init(&pm->sig_query, FALSE);
131 }
132
133 static void mark_for_removal(osm_perfmgr_t * pm, monitored_node_t * node)
134 {
135         if (pm->remove_list) {
136                 node->next = pm->remove_list;
137                 pm->remove_list = node;
138         } else {
139                 node->next = NULL;
140                 pm->remove_list = node;
141         }
142 }
143
144 static void remove_marked_nodes(osm_perfmgr_t * pm)
145 {
146         while (pm->remove_list) {
147                 monitored_node_t *next = pm->remove_list->next;
148                 int port;
149
150                 cl_qmap_remove_item(&pm->monitored_map,
151                                     (cl_map_item_t *) (pm->remove_list));
152
153                 if (pm->rm_nodes)
154                         perfmgr_db_delete_entry(pm->db, pm->remove_list->guid);
155                 else
156                         perfmgr_db_mark_active(pm->db, pm->remove_list->guid, FALSE);
157
158                 if (pm->remove_list->name)
159                         free(pm->remove_list->name);
160
161                 for (port = pm->remove_list->esp0 ? 0 : 1;
162                      port < pm->remove_list->num_ports;
163                      port++) {
164                         if (pm->remove_list->port[port].remote_name)
165                                 free(pm->remove_list->port[port].remote_name);
166                 }
167
168                 free(pm->remove_list);
169                 pm->remove_list = next;
170         }
171 }
172
173 static inline void decrement_outstanding_queries(osm_perfmgr_t * pm)
174 {
175         cl_atomic_dec(&pm->outstanding_queries);
176
177         if (!pm->outstanding_queries) {
178                 cl_spinlock_acquire(&pm->lock);
179                 if (pm->sweep_state == PERFMGR_SWEEP_POST_PROCESSING) {
180                         pm->sweep_state = PERFMGR_SWEEP_SLEEP;
181                         OSM_LOG(pm->log, OSM_LOG_INFO,
182                                 "PM sweep state exiting Post Processing\n");
183                 }
184                 cl_spinlock_release(&pm->lock);
185         }
186
187         cl_event_signal(&pm->sig_query);
188 }
189
190 /**********************************************************************
191  * Receive the MAD from the vendor layer and post it for processing by
192  * the dispatcher
193  **********************************************************************/
194 static void perfmgr_mad_recv_callback(osm_madw_t * p_madw, void *bind_context,
195                                       osm_madw_t * p_req_madw)
196 {
197         osm_perfmgr_t *pm = (osm_perfmgr_t *) bind_context;
198
199         OSM_LOG_ENTER(pm->log);
200
201         CL_ASSERT(p_madw);
202         CL_ASSERT(p_req_madw != NULL);
203
204         osm_madw_copy_context(p_madw, p_req_madw);
205         osm_mad_pool_put(pm->mad_pool, p_req_madw);
206
207         decrement_outstanding_queries(pm);
208
209         /* post this message for later processing. */
210         if (cl_disp_post(pm->pc_disp_h, OSM_MSG_MAD_PORT_COUNTERS,
211                          p_madw, NULL, NULL) != CL_SUCCESS) {
212                 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5401: "
213                         "PerfMgr Dispatcher post failed\n");
214                 osm_mad_pool_put(pm->mad_pool, p_madw);
215         }
216         OSM_LOG_EXIT(pm->log);
217 }
218
219 /**********************************************************************
220  * Process MAD send errors
221  **********************************************************************/
222 static void perfmgr_mad_send_err_callback(void *bind_context,
223                                           osm_madw_t * p_madw)
224 {
225         osm_perfmgr_t *pm = (osm_perfmgr_t *) bind_context;
226         osm_madw_context_t *context = &p_madw->context;
227         uint64_t node_guid = context->perfmgr_context.node_guid;
228         uint8_t port = context->perfmgr_context.port;
229         cl_map_item_t *p_node;
230         monitored_node_t *p_mon_node;
231         ib_net16_t orig_lid;
232
233         OSM_LOG_ENTER(pm->log);
234
235         /*
236          * get the monitored node struct to have the printable name
237          * for log messages
238          */
239         if ((p_node = cl_qmap_get(&pm->monitored_map, node_guid)) ==
240             cl_qmap_end(&pm->monitored_map)) {
241                 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5415: GUID 0x%016"
242                         PRIx64 " not found in monitored map\n", node_guid);
243                 goto Exit;
244         }
245         p_mon_node = (monitored_node_t *) p_node;
246
247         OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5402: %s (0x%" PRIx64
248                 ") port %u LID %u TID 0x%" PRIx64 "\n",
249                 p_mon_node->name, p_mon_node->guid, port,
250                 cl_ntoh16(p_madw->mad_addr.dest_lid),
251                 cl_ntoh64(p_madw->p_mad->trans_id));
252
253         if (pm->subn->opt.perfmgr_redir && p_madw->status == IB_TIMEOUT) {
254                 /* First, find the node in the monitored map */
255                 cl_plock_acquire(&pm->osm->lock);
256                 /* Now, validate port number */
257                 if (port >= p_mon_node->num_ports) {
258                         cl_plock_release(&pm->osm->lock);
259                         OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5416: "
260                                 "Invalid port num %u for %s (GUID 0x%016"
261                                 PRIx64 ") num ports %u\n", port,
262                                 p_mon_node->name, p_mon_node->guid,
263                                 p_mon_node->num_ports);
264                         goto Exit;
265                 }
266                 /* Clear redirection info for this port except orig_lid */
267                 orig_lid = p_mon_node->port[port].orig_lid;
268                 memset(&p_mon_node->port[port], 0, sizeof(monitored_port_t));
269                 p_mon_node->port[port].orig_lid = orig_lid;
270                 p_mon_node->port[port].valid = TRUE;
271                 cl_plock_release(&pm->osm->lock);
272         }
273
274 Exit:
275         osm_mad_pool_put(pm->mad_pool, p_madw);
276
277         decrement_outstanding_queries(pm);
278
279         OSM_LOG_EXIT(pm->log);
280 }
281
282 /**********************************************************************
283  * Bind the PerfMgr to the vendor layer for MAD sends/receives
284  **********************************************************************/
285 ib_api_status_t osm_perfmgr_bind(osm_perfmgr_t * pm, ib_net64_t port_guid)
286 {
287         osm_bind_info_t bind_info;
288         ib_api_status_t status = IB_SUCCESS;
289
290         OSM_LOG_ENTER(pm->log);
291
292         if (pm->bind_handle != OSM_BIND_INVALID_HANDLE) {
293                 OSM_LOG(pm->log, OSM_LOG_ERROR,
294                         "ERR 5403: Multiple binds not allowed\n");
295                 status = IB_ERROR;
296                 goto Exit;
297         }
298
299         bind_info.port_guid = pm->port_guid = port_guid;
300         bind_info.mad_class = IB_MCLASS_PERF;
301         bind_info.class_version = 1;
302         bind_info.is_responder = FALSE;
303         bind_info.is_report_processor = FALSE;
304         bind_info.is_trap_processor = FALSE;
305         bind_info.recv_q_size = OSM_PM_DEFAULT_QP1_RCV_SIZE;
306         bind_info.send_q_size = OSM_PM_DEFAULT_QP1_SEND_SIZE;
307         bind_info.timeout = pm->subn->opt.transaction_timeout;
308         bind_info.retries = pm->subn->opt.transaction_retries;
309
310         OSM_LOG(pm->log, OSM_LOG_VERBOSE,
311                 "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid));
312
313         pm->bind_handle = osm_vendor_bind(pm->vendor, &bind_info, pm->mad_pool,
314                                           perfmgr_mad_recv_callback,
315                                           perfmgr_mad_send_err_callback, pm);
316
317         if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) {
318                 status = IB_ERROR;
319                 OSM_LOG(pm->log, OSM_LOG_ERROR,
320                         "ERR 5404: Vendor specific bind failed (%s)\n",
321                         ib_get_err_str(status));
322         }
323
324 Exit:
325         OSM_LOG_EXIT(pm->log);
326         return status;
327 }
328
329 /**********************************************************************
330  * Unbind the PerfMgr from the vendor layer for MAD sends/receives
331  **********************************************************************/
332 static void perfmgr_mad_unbind(osm_perfmgr_t * pm)
333 {
334         OSM_LOG_ENTER(pm->log);
335         if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) {
336                 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5405: No previous bind\n");
337                 goto Exit;
338         }
339         osm_vendor_unbind(pm->bind_handle);
340 Exit:
341         OSM_LOG_EXIT(pm->log);
342 }
343
344 /**********************************************************************
345  * Given a monitored node and a port, return the qp
346  **********************************************************************/
347 static ib_net32_t get_qp(monitored_node_t * mon_node, uint8_t port)
348 {
349         ib_net32_t qp = IB_QP1;
350
351         if (mon_node && mon_node->num_ports && port < mon_node->num_ports &&
352             mon_node->port[port].redirection && mon_node->port[port].qp)
353                 qp = mon_node->port[port].qp;
354
355         return qp;
356 }
357
358 static ib_net16_t get_base_lid(osm_node_t * p_node, uint8_t port)
359 {
360         switch (p_node->node_info.node_type) {
361         case IB_NODE_TYPE_CA:
362         case IB_NODE_TYPE_ROUTER:
363                 return osm_node_get_base_lid(p_node, port);
364         case IB_NODE_TYPE_SWITCH:
365                 return osm_node_get_base_lid(p_node, 0);
366         default:
367                 return 0;
368         }
369 }
370
371 /**********************************************************************
372  * Given a node, a port, and an optional monitored node,
373  * return the lid appropriate to query that port
374  **********************************************************************/
375 static ib_net16_t get_lid(osm_node_t * p_node, uint8_t port,
376                           monitored_node_t * mon_node)
377 {
378         if (mon_node && mon_node->num_ports && port < mon_node->num_ports &&
379             mon_node->port[port].lid)
380                 return mon_node->port[port].lid;
381
382         return get_base_lid(p_node, port);
383 }
384
385 /**********************************************************************
386  * Build a Performance Management class MAD
387  **********************************************************************/
388 static osm_madw_t *perfmgr_build_mad(osm_perfmgr_t * perfmgr,
389                                      ib_net16_t dest_lid,
390                                      uint8_t sl,
391                                      ib_net32_t dest_qp,
392                                      uint16_t pkey_ix,
393                                      uint8_t mad_method,
394                                      ib_net16_t attr_id,
395                                      osm_madw_context_t * p_context,
396                                      ib_perfmgt_mad_t ** p_pm_mad)
397 {
398         ib_perfmgt_mad_t *pm_mad = NULL;
399         osm_madw_t *p_madw = NULL;
400
401         OSM_LOG_ENTER(perfmgr->log);
402
403         p_madw = osm_mad_pool_get(perfmgr->mad_pool, perfmgr->bind_handle,
404                                   MAD_BLOCK_SIZE, NULL);
405         if (p_madw == NULL)
406                 return NULL;
407
408         pm_mad = osm_madw_get_perfmgt_mad_ptr(p_madw);
409
410         /* build the mad */
411         pm_mad->header.base_ver = 1;
412         pm_mad->header.mgmt_class = IB_MCLASS_PERF;
413         pm_mad->header.class_ver = 1;
414         pm_mad->header.method = mad_method;
415         pm_mad->header.status = 0;
416         pm_mad->header.class_spec = 0;
417         pm_mad->header.trans_id =
418             cl_hton64((uint64_t) cl_atomic_inc(&perfmgr->trans_id) &
419                       (uint64_t) (0xFFFFFFFF));
420         if (perfmgr->trans_id == 0)
421                 pm_mad->header.trans_id =
422                     cl_hton64((uint64_t) cl_atomic_inc(&perfmgr->trans_id) &
423                               (uint64_t) (0xFFFFFFFF));
424         pm_mad->header.attr_id = attr_id;
425         pm_mad->header.resv = 0;
426         pm_mad->header.attr_mod = 0;
427
428         p_madw->mad_addr.dest_lid = dest_lid;
429         p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp;
430         p_madw->mad_addr.addr_type.gsi.remote_qkey =
431             cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY);
432         p_madw->mad_addr.addr_type.gsi.pkey_ix = pkey_ix;
433         p_madw->mad_addr.addr_type.gsi.service_level = sl;
434         p_madw->mad_addr.addr_type.gsi.global_route = FALSE;
435         p_madw->resp_expected = TRUE;
436
437         if (p_context)
438                 p_madw->context = *p_context;
439
440         if (p_pm_mad)
441                 *p_pm_mad = pm_mad;
442
443         OSM_LOG_EXIT(perfmgr->log);
444
445         return (p_madw);
446 }
447
448 /**********************************************************************
449  * Send a Performance Management class MAD
450  **********************************************************************/
451 static ib_api_status_t perfmgr_send_mad(osm_perfmgr_t *perfmgr,
452                                         osm_madw_t * const p_madw)
453 {
454         cl_status_t sts;
455         ib_api_status_t status = osm_vendor_send(perfmgr->bind_handle, p_madw,
456                                                  TRUE);
457         if (status == IB_SUCCESS) {
458                 /* pause thread if there are too many outstanding requests */
459                 cl_atomic_inc(&(perfmgr->outstanding_queries));
460                 while (perfmgr->outstanding_queries >
461                        (int32_t)perfmgr->max_outstanding_queries) {
462                         cl_spinlock_acquire(&perfmgr->lock);
463                         if (perfmgr->sweep_state == PERFMGR_SWEEP_SLEEP) {
464                                 perfmgr->sweep_state = PERFMGR_SWEEP_POST_PROCESSING;
465                                 OSM_LOG(perfmgr->log, OSM_LOG_INFO,
466                                         "PM sweep state going into Post Processing\n");
467                         } else if (perfmgr->sweep_state == PERFMGR_SWEEP_ACTIVE)
468                                 perfmgr->sweep_state = PERFMGR_SWEEP_SUSPENDED;
469                         cl_spinlock_release(&perfmgr->lock);
470 wait:
471                         sts = cl_event_wait_on(&perfmgr->sig_query,
472                                                EVENT_NO_TIMEOUT, TRUE);
473                         if (sts != CL_SUCCESS)
474                                 goto wait;
475
476                         cl_spinlock_acquire(&perfmgr->lock);
477                         if (perfmgr->sweep_state == PERFMGR_SWEEP_SUSPENDED)
478                                 perfmgr->sweep_state = PERFMGR_SWEEP_ACTIVE;
479                         cl_spinlock_release(&perfmgr->lock);
480                 }
481         }
482         return (status);
483 }
484
485
486 /**********************************************************************
487  * Form and send the PortCounters MAD for a single port
488  **********************************************************************/
489 static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
490                                            ib_net16_t dest_lid,
491                                            ib_net32_t dest_qp, uint16_t pkey_ix,
492                                            uint8_t port, uint8_t mad_method,
493                                            uint16_t counter_select,
494                                            uint8_t counter_select2,
495                                            osm_madw_context_t * p_context,
496                                            uint8_t sl)
497 {
498         ib_api_status_t status = IB_SUCCESS;
499         ib_port_counters_t *port_counter = NULL;
500         ib_perfmgt_mad_t *pm_mad = NULL;
501         osm_madw_t *p_madw = NULL;
502
503         OSM_LOG_ENTER(perfmgr->log);
504
505         p_context->perfmgr_context.mad_attr_id = IB_MAD_ATTR_PORT_CNTRS;
506         p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix,
507                                 mad_method, IB_MAD_ATTR_PORT_CNTRS, p_context,
508                                 &pm_mad);
509         if (p_madw == NULL)
510                 return IB_INSUFFICIENT_MEMORY;
511
512         port_counter = (ib_port_counters_t *) & pm_mad->data;
513         memset(port_counter, 0, sizeof(*port_counter));
514         port_counter->port_select = port;
515         port_counter->counter_select = cl_hton16(counter_select);
516         port_counter->counter_select2 = counter_select2;
517
518         status = perfmgr_send_mad(perfmgr, p_madw);
519
520         OSM_LOG_EXIT(perfmgr->log);
521         return status;
522 }
523
524 /**********************************************************************
525  * sweep the node_guid_tbl and collect the node guids to be tracked
526  **********************************************************************/
527 static void collect_guids(cl_map_item_t * p_map_item, void *context)
528 {
529         osm_node_t *node = (osm_node_t *) p_map_item;
530         uint64_t node_guid = cl_ntoh64(node->node_info.node_guid);
531         osm_perfmgr_t *pm = (osm_perfmgr_t *) context;
532         monitored_node_t *mon_node = NULL;
533         uint32_t num_ports;
534         int port;
535
536         OSM_LOG_ENTER(pm->log);
537
538         if (cl_qmap_get(&pm->monitored_map, node_guid) ==
539             cl_qmap_end(&pm->monitored_map)) {
540
541                 if (pm->ignore_cas &&
542                     (node->node_info.node_type == IB_NODE_TYPE_CA))
543                         goto Exit;
544
545                 /* if not already in map add it */
546                 num_ports = osm_node_get_num_physp(node);
547                 mon_node = malloc(sizeof(*mon_node) +
548                                   sizeof(monitored_port_t) * num_ports);
549                 if (!mon_node) {
550                         OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5406: "
551                                 "malloc failed: not handling node %s"
552                                 "(GUID 0x%" PRIx64 ")\n", node->print_desc,
553                                 node_guid);
554                         goto Exit;
555                 }
556                 memset(mon_node, 0,
557                        sizeof(*mon_node) + sizeof(monitored_port_t) * num_ports);
558                 mon_node->guid = node_guid;
559                 mon_node->name = strdup(node->print_desc);
560                 mon_node->num_ports = num_ports;
561                 mon_node->node_type = node->node_info.node_type;
562                 /* check for enhanced switch port 0 */
563                 mon_node->esp0 = (node->sw &&
564                                   ib_switch_info_is_enhanced_port0(&node->sw->
565                                                                    switch_info));
566                 for (port = mon_node->esp0 ? 0 : 1; port < num_ports; port++) {
567                         monitored_port_t *mon_port = &mon_node->port[port];
568                         osm_physp_t *p_physp = &node->physp_table[port];
569                         osm_physp_t *p_remote_physp = p_physp->p_remote_physp;
570
571                         mon_port->orig_lid = 0;
572                         mon_port->valid = FALSE;
573                         if (osm_physp_is_valid(p_physp)) {
574                                 mon_port->orig_lid = get_base_lid(node, port);
575                                 mon_port->valid = TRUE;
576                         }
577                         mon_port->remote_valid = FALSE;
578                         mon_port->remote_name = NULL;
579                         if (p_remote_physp && osm_physp_is_valid(p_remote_physp)) {
580                                 osm_node_t *p_remote_node = p_remote_physp->p_node;
581                                 mon_port->remote_valid = TRUE;
582                                 mon_port->remote_guid = p_remote_node->node_info.node_guid;
583                                 mon_port->remote_name = strdup(p_remote_node->print_desc);
584                                 mon_port->remote_port = p_remote_physp->port_num;
585                         }
586                 }
587
588                 cl_qmap_insert(&pm->monitored_map, node_guid,
589                                (cl_map_item_t *) mon_node);
590         }
591
592 Exit:
593         OSM_LOG_EXIT(pm->log);
594 }
595
596 /**********************************************************************
597  * Form and send the ClassPortInfo MAD for a single port
598  **********************************************************************/
599 static ib_api_status_t perfmgr_send_cpi_mad(osm_perfmgr_t * pm,
600                                             ib_net16_t dest_lid,
601                                             ib_net32_t dest_qp,
602                                             uint16_t pkey_ix,
603                                             uint8_t port,
604                                             osm_madw_context_t * p_context,
605                                             uint8_t sl)
606 {
607         ib_api_status_t status = IB_SUCCESS;
608         osm_madw_t *p_madw = NULL;
609
610         OSM_LOG_ENTER(pm->log);
611
612         p_context->perfmgr_context.mad_attr_id = IB_MAD_ATTR_CLASS_PORT_INFO;
613         p_madw = perfmgr_build_mad(pm, dest_lid, sl, dest_qp,
614                                    pkey_ix, IB_MAD_METHOD_GET,
615                                    IB_MAD_ATTR_CLASS_PORT_INFO, p_context,
616                                    NULL);
617         if (p_madw == NULL)
618                 return IB_INSUFFICIENT_MEMORY;
619
620         status = perfmgr_send_mad(pm, p_madw);
621
622         OSM_LOG_EXIT(pm->log);
623         return status;
624 }
625
626 /**********************************************************************
627  * return if some form of PortCountersExtended (PCE || PCE NoIETF) are supported
628  **********************************************************************/
629 static inline boolean_t pce_supported(monitored_node_t *mon_node, uint8_t port)
630 {
631         monitored_port_t *mon_port = &(mon_node->port[port]);
632         return (mon_port->cpi_valid
633                 && (mon_port->cap_mask & IB_PM_EXT_WIDTH_SUPPORTED
634                 || mon_port->cap_mask & IB_PM_EXT_WIDTH_NOIETF_SUP));
635 }
636
637 /**********************************************************************
638  * return if CapMask.PortCountersXmitWaitSupported is set
639  **********************************************************************/
640 static inline boolean_t xmit_wait_supported(monitored_node_t *mon_node, uint8_t port)
641 {
642         monitored_port_t *mon_port = &(mon_node->port[port]);
643         return (mon_port->cpi_valid
644                 && (mon_port->cap_mask & IB_PM_PC_XMIT_WAIT_SUP));
645 }
646
647 /**********************************************************************
648  * return if "full" PortCountersExtended (IETF) is indicated
649  **********************************************************************/
650 static inline boolean_t ietf_supported(monitored_node_t *mon_node, uint8_t port)
651 {
652         monitored_port_t *mon_port = &(mon_node->port[port]);
653         return (mon_port->cpi_valid
654                 && (mon_port->cap_mask & IB_PM_EXT_WIDTH_SUPPORTED));
655 }
656
657 /**********************************************************************
658  * Form and send the PortCountersExtended MAD for a single port
659  **********************************************************************/
660 static ib_api_status_t perfmgr_send_pce_mad(osm_perfmgr_t * perfmgr,
661                                             ib_net16_t dest_lid,
662                                             ib_net32_t dest_qp,
663                                             uint16_t pkey_ix,
664                                             uint8_t port, uint8_t mad_method,
665                                             osm_madw_context_t * p_context,
666                                             uint8_t sl)
667 {
668         ib_api_status_t status = IB_SUCCESS;
669         ib_port_counters_ext_t *port_counter_ext = NULL;
670         ib_perfmgt_mad_t *pm_mad = NULL;
671         osm_madw_t *p_madw = NULL;
672
673         OSM_LOG_ENTER(perfmgr->log);
674
675         p_context->perfmgr_context.mad_attr_id = IB_MAD_ATTR_PORT_CNTRS_EXT;
676         p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix,
677                                 mad_method, IB_MAD_ATTR_PORT_CNTRS_EXT, p_context,
678                                 &pm_mad);
679         if (p_madw == NULL)
680                 return IB_INSUFFICIENT_MEMORY;
681
682         port_counter_ext = (ib_port_counters_ext_t *) & pm_mad->data;
683         memset(port_counter_ext, 0, sizeof(*port_counter_ext));
684         port_counter_ext->port_select = port;
685         port_counter_ext->counter_select = cl_hton16(0x00FF);
686
687         status = perfmgr_send_mad(perfmgr, p_madw);
688
689         OSM_LOG_EXIT(perfmgr->log);
690         return status;
691 }
692
693 /**********************************************************************
694  * query the Port Counters of all the nodes in the subnet
695  **********************************************************************/
696 static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context)
697 {
698         ib_api_status_t status = IB_SUCCESS;
699         osm_perfmgr_t *pm = context;
700         osm_node_t *node = NULL;
701         monitored_node_t *mon_node = (monitored_node_t *) p_map_item;
702         osm_madw_context_t mad_context;
703         uint64_t node_guid = 0;
704         ib_net32_t remote_qp;
705         uint8_t port, num_ports = 0;
706
707         OSM_LOG_ENTER(pm->log);
708
709         cl_plock_acquire(&pm->osm->lock);
710         node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid));
711         if (!node) {
712                 OSM_LOG(pm->log, OSM_LOG_ERROR,
713                         "ERR 5407: Node \"%s\" (guid 0x%" PRIx64
714                         ") no longer exists so removing from PerfMgr monitoring\n",
715                         mon_node->name, mon_node->guid);
716                 mark_for_removal(pm, mon_node);
717                 goto Exit;
718         }
719
720         num_ports = osm_node_get_num_physp(node);
721         node_guid = cl_ntoh64(node->node_info.node_guid);
722
723         /* make sure there is a database object ready to store this info */
724         if (perfmgr_db_create_entry(pm->db, node_guid, mon_node->esp0,
725                                     num_ports, node->print_desc) !=
726             PERFMGR_EVENT_DB_SUCCESS) {
727                 OSM_LOG(pm->log, OSM_LOG_ERROR,
728                         "ERR 5408: DB create entry failed for 0x%"
729                         PRIx64 " (%s) : %s\n", node_guid, node->print_desc,
730                         strerror(errno));
731                 goto Exit;
732         }
733
734         perfmgr_db_mark_active(pm->db, node_guid, TRUE);
735
736         /* issue the query for each port */
737         for (port = mon_node->esp0 ? 0 : 1; port < num_ports; port++) {
738                 ib_net16_t lid;
739
740                 if (!osm_node_get_physp_ptr(node, port))
741                         continue;
742
743                 if (!mon_node->port[port].valid)
744                         continue;
745
746                 lid = get_lid(node, port, mon_node);
747                 if (lid == 0) {
748                         OSM_LOG(pm->log, OSM_LOG_DEBUG, "WARN: node 0x%" PRIx64
749                                 " port %d (%s): port out of range, skipping\n",
750                                 cl_ntoh64(node->node_info.node_guid), port,
751                                 node->print_desc);
752                         continue;
753                 }
754
755                 remote_qp = get_qp(mon_node, port);
756
757                 mad_context.perfmgr_context.node_guid = node_guid;
758                 mad_context.perfmgr_context.port = port;
759                 mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_GET;
760
761                 if (pm->query_cpi && !mon_node->port[port].cpi_valid) {
762                         status = perfmgr_send_cpi_mad(pm, lid, remote_qp,
763                                                 mon_node->port[port].pkey_ix,
764                                                 port, &mad_context,
765                                                 0); /* FIXME SL != 0 */
766                         if (status != IB_SUCCESS)
767                                 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5410: "
768                                         "Failed to issue ClassPortInfo query "
769                                         "for node 0x%" PRIx64
770                                         " port %d (%s)\n",
771                                         node->node_info.node_guid, port,
772                                         node->print_desc);
773                         if (mon_node->node_type == IB_NODE_TYPE_SWITCH)
774                                 goto Exit; /* only need to issue 1 CPI query
775                                                 for switches */
776                 } else {
777
778 #ifdef ENABLE_OSM_PERF_MGR_PROFILE
779                         gettimeofday(&mad_context.perfmgr_context.query_start, NULL);
780 #endif
781                         OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%"
782                                 PRIx64 " port %d (lid %u) (%s)\n",
783                                 node_guid, port, cl_ntoh16(lid),
784                                 node->print_desc);
785                         status = perfmgr_send_pc_mad(pm, lid, remote_qp,
786                                                      mon_node->port[port].pkey_ix,
787                                                      port, IB_MAD_METHOD_GET,
788                                                      0xffff,
789                                                      1,
790                                                      &mad_context,
791                                                      0); /* FIXME SL != 0 */
792                         if (status != IB_SUCCESS)
793                                 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: "
794                                         "Failed to issue port counter query for node 0x%"
795                                         PRIx64 " port %d (%s)\n",
796                                         node->node_info.node_guid, port,
797                                         node->print_desc);
798
799                         if (pce_supported(mon_node, port)) {
800
801 #if ENABLE_OSM_PERF_MGR_PROFILE
802                                 gettimeofday(&mad_context.perfmgr_context.query_start, NULL);
803 #endif
804                                 status = perfmgr_send_pce_mad(pm, lid, remote_qp,
805                                                               mon_node->port[port].pkey_ix,
806                                                               port,
807                                                               IB_MAD_METHOD_GET,
808                                                               &mad_context,
809                                                               0); /* FIXME SL != 0 */
810                                 if (status != IB_SUCCESS)
811                                         OSM_LOG(pm->log, OSM_LOG_ERROR,
812                                                 "ERR 5417: Failed to issue "
813                                                 "port counter query for "
814                                                 "node 0x%" PRIx64 " port "
815                                                 "%d (%s)\n",
816                                                 node->node_info.node_guid,
817                                                 port,
818                                                 node->print_desc);
819                         }
820                 }
821         }
822 Exit:
823         cl_plock_release(&pm->osm->lock);
824         OSM_LOG_EXIT(pm->log);
825 }
826
827 /**********************************************************************
828  * Discovery stuff
829  * This code should not be here, but merged with main OpenSM
830  **********************************************************************/
831 extern int wait_for_pending_transactions(osm_stats_t * stats);
832 extern void osm_drop_mgr_process(IN osm_sm_t * sm);
833
834 static int sweep_hop_1(osm_sm_t * sm)
835 {
836         ib_api_status_t status = IB_SUCCESS;
837         osm_madw_context_t context;
838         osm_node_t *p_node;
839         osm_port_t *p_port;
840         osm_dr_path_t hop_1_path;
841         ib_net64_t port_guid;
842         uint8_t port_num;
843         uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX];
844         uint8_t num_ports;
845         osm_physp_t *p_ext_physp;
846
847         port_guid = sm->p_subn->sm_port_guid;
848
849         p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
850         if (!p_port) {
851                 OSM_LOG(sm->p_log, OSM_LOG_ERROR,
852                         "ERR 5481: No SM port object\n");
853                 return -1;
854         }
855
856         p_node = p_port->p_node;
857         port_num = ib_node_info_get_local_port_num(&p_node->node_info);
858
859         OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
860                 "Probing hop 1 on local port %u\n", port_num);
861
862         memset(path_array, 0, sizeof(path_array));
863         /* the hop_1 operations depend on the type of our node.
864          * Currently - legal nodes that can host SM are SW and CA */
865         switch (osm_node_get_type(p_node)) {
866         case IB_NODE_TYPE_CA:
867         case IB_NODE_TYPE_ROUTER:
868                 memset(&context, 0, sizeof(context));
869                 context.ni_context.node_guid = osm_node_get_node_guid(p_node);
870                 context.ni_context.port_num = port_num;
871
872                 path_array[1] = port_num;
873
874                 osm_dr_path_init(&hop_1_path, 1, path_array);
875                 CL_PLOCK_ACQUIRE(sm->p_lock);
876                 status = osm_req_get(sm, &hop_1_path, IB_MAD_ATTR_NODE_INFO, 0,
877                                      TRUE, 0, CL_DISP_MSGID_NONE, &context);
878                 CL_PLOCK_RELEASE(sm->p_lock);
879
880                 if (status != IB_SUCCESS)
881                         OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5482: "
882                                 "Request for NodeInfo failed\n");
883                 break;
884
885         case IB_NODE_TYPE_SWITCH:
886                 /* Need to go over all the ports of the switch, and send a node_info
887                  * from them. This doesn't include the port 0 of the switch, which
888                  * hosts the SM.
889                  * Note: We'll send another switchInfo on port 0, since if no ports
890                  * are connected, we still want to get some response, and have the
891                  * subnet come up.
892                  */
893                 num_ports = osm_node_get_num_physp(p_node);
894                 for (port_num = 0; port_num < num_ports; port_num++) {
895                         /* go through the port only if the port is not DOWN */
896                         p_ext_physp = osm_node_get_physp_ptr(p_node, port_num);
897                         if (!p_ext_physp || ib_port_info_get_port_state
898                             (&p_ext_physp->port_info) <= IB_LINK_DOWN)
899                                 continue;
900
901                         memset(&context, 0, sizeof(context));
902                         context.ni_context.node_guid =
903                             osm_node_get_node_guid(p_node);
904                         context.ni_context.port_num = port_num;
905
906                         path_array[1] = port_num;
907
908                         osm_dr_path_init(&hop_1_path, 1, path_array);
909                         CL_PLOCK_ACQUIRE(sm->p_lock);
910                         status = osm_req_get(sm, &hop_1_path,
911                                              IB_MAD_ATTR_NODE_INFO, 0, TRUE, 0,
912                                              CL_DISP_MSGID_NONE, &context);
913                         CL_PLOCK_RELEASE(sm->p_lock);
914
915                         if (status != IB_SUCCESS)
916                                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5484: "
917                                         "Request for NodeInfo failed\n");
918                 }
919                 break;
920
921         default:
922                 OSM_LOG(sm->p_log, OSM_LOG_ERROR,
923                         "ERR 5483: Unknown node type %d\n",
924                         osm_node_get_type(p_node));
925         }
926
927         return status;
928 }
929
930 static unsigned is_sm_port_down(osm_sm_t * sm)
931 {
932         ib_net64_t port_guid;
933         osm_port_t *p_port;
934
935         port_guid = sm->p_subn->sm_port_guid;
936         if (port_guid == 0)
937                 return 1;
938
939         CL_PLOCK_ACQUIRE(sm->p_lock);
940         p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
941         if (!p_port) {
942                 CL_PLOCK_RELEASE(sm->p_lock);
943                 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5485: "
944                         "SM port with GUID:%016" PRIx64 " is unknown\n",
945                         cl_ntoh64(port_guid));
946                 return 1;
947         }
948         CL_PLOCK_RELEASE(sm->p_lock);
949
950         if (p_port->p_node->sw &&
951             !ib_switch_info_is_enhanced_port0(&p_port->p_node->sw->switch_info))
952                 return 0;       /* base SP0 */
953
954         return osm_physp_get_port_state(p_port->p_physp) == IB_LINK_DOWN;
955 }
956
957 static int sweep_hop_0(osm_sm_t * sm)
958 {
959         ib_api_status_t status;
960         osm_dr_path_t dr_path;
961         osm_bind_handle_t h_bind;
962         uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX];
963
964         memset(path_array, 0, sizeof(path_array));
965
966         h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl);
967         if (h_bind == OSM_BIND_INVALID_HANDLE) {
968                 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "No bound ports\n");
969                 return -1;
970         }
971
972         osm_dr_path_init(&dr_path, 0, path_array);
973         CL_PLOCK_ACQUIRE(sm->p_lock);
974         status = osm_req_get(sm, &dr_path, IB_MAD_ATTR_NODE_INFO, 0,
975                              TRUE, 0, CL_DISP_MSGID_NONE, NULL);
976         CL_PLOCK_RELEASE(sm->p_lock);
977
978         if (status != IB_SUCCESS)
979                 OSM_LOG(sm->p_log, OSM_LOG_ERROR,
980                         "ERR 5486: Request for NodeInfo failed\n");
981
982         return status;
983 }
984
985 static void reset_node_count(cl_map_item_t * p_map_item, void *cxt)
986 {
987         osm_node_t *p_node = (osm_node_t *) p_map_item;
988         p_node->discovery_count = 0;
989
990         memset(p_node->physp_discovered, 0,
991                sizeof(uint8_t) * p_node->physp_tbl_size);
992 }
993
994 static void reset_port_count(cl_map_item_t * p_map_item, void *cxt)
995 {
996         osm_port_t *p_port = (osm_port_t *) p_map_item;
997         p_port->discovery_count = 0;
998 }
999
1000 static void reset_switch_count(cl_map_item_t * p_map_item, void *cxt)
1001 {
1002         osm_switch_t *p_sw = (osm_switch_t *) p_map_item;
1003         p_sw->need_update = 0;
1004 }
1005
1006 static int perfmgr_discovery(osm_opensm_t * osm)
1007 {
1008         int ret;
1009
1010         CL_PLOCK_ACQUIRE(&osm->lock);
1011         cl_qmap_apply_func(&osm->subn.node_guid_tbl, reset_node_count, NULL);
1012         cl_qmap_apply_func(&osm->subn.port_guid_tbl, reset_port_count, NULL);
1013         cl_qmap_apply_func(&osm->subn.sw_guid_tbl, reset_switch_count, NULL);
1014         CL_PLOCK_RELEASE(&osm->lock);
1015
1016         osm->subn.in_sweep_hop_0 = TRUE;
1017
1018         ret = sweep_hop_0(&osm->sm);
1019         if (ret)
1020                 goto _exit;
1021
1022         if (wait_for_pending_transactions(&osm->stats))
1023                 goto _exit;
1024
1025         if (is_sm_port_down(&osm->sm)) {
1026                 OSM_LOG(&osm->log, OSM_LOG_VERBOSE, "SM port is down\n");
1027                 goto _drop;
1028         }
1029
1030         osm->subn.in_sweep_hop_0 = FALSE;
1031
1032         ret = sweep_hop_1(&osm->sm);
1033         if (ret)
1034                 goto _exit;
1035
1036         if (wait_for_pending_transactions(&osm->stats))
1037                 goto _exit;
1038
1039 _drop:
1040         osm_drop_mgr_process(&osm->sm);
1041
1042 _exit:
1043         return ret;
1044 }
1045
1046 /**********************************************************************
1047  * Main PerfMgr processor - query the performance counters
1048  **********************************************************************/
1049 void osm_perfmgr_process(osm_perfmgr_t * pm)
1050 {
1051 #ifdef ENABLE_OSM_PERF_MGR_PROFILE
1052         struct timeval before, after;
1053 #endif
1054
1055         if (pm->state != PERFMGR_STATE_ENABLED)
1056                 return;
1057
1058         cl_spinlock_acquire(&pm->lock);
1059         if (pm->sweep_state == PERFMGR_SWEEP_ACTIVE ||
1060             pm->sweep_state == PERFMGR_SWEEP_SUSPENDED ||
1061             pm->sweep_state == PERFMGR_SWEEP_POST_PROCESSING) {
1062                 cl_spinlock_release(&pm->lock);
1063                 OSM_LOG(pm->log, OSM_LOG_INFO,
1064                         "PM sweep state %d, skipping sweep\n",
1065                         pm->sweep_state);
1066                 return;
1067         }
1068
1069         pm->sweep_state = PERFMGR_SWEEP_ACTIVE;
1070         cl_spinlock_release(&pm->lock);
1071
1072         if (pm->subn->sm_state == IB_SMINFO_STATE_STANDBY ||
1073             pm->subn->sm_state == IB_SMINFO_STATE_NOTACTIVE)
1074                 perfmgr_discovery(pm->subn->p_osm);
1075
1076         /* if redirection enabled, determine local port */
1077         if (pm->subn->opt.perfmgr_redir && pm->local_port == -1) {
1078                 osm_node_t *p_node;
1079                 osm_port_t *p_port;
1080
1081                 CL_PLOCK_ACQUIRE(pm->sm->p_lock);
1082                 p_port = osm_get_port_by_guid(pm->subn, pm->port_guid);
1083                 if (p_port) {
1084                         p_node = p_port->p_node;
1085                         CL_ASSERT(p_node);
1086                         pm->local_port =
1087                             ib_node_info_get_local_port_num(&p_node->node_info);
1088                 } else
1089                         OSM_LOG(pm->log, OSM_LOG_ERROR,
1090                                 "ERR 5487: No PerfMgr port object for "
1091                                 "port GUID 0x%" PRIx64 "\n",
1092                                 cl_ntoh64(pm->port_guid));
1093                 CL_PLOCK_RELEASE(pm->sm->p_lock);
1094         }
1095
1096 #ifdef ENABLE_OSM_PERF_MGR_PROFILE
1097         gettimeofday(&before, NULL);
1098 #endif
1099         /* With the global lock held, collect the node guids */
1100         /* FIXME we should be able to track SA notices
1101          * and not have to sweep the node_guid_tbl each pass
1102          */
1103         OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Gathering PerfMgr stats\n");
1104         cl_plock_acquire(&pm->osm->lock);
1105         cl_qmap_apply_func(&pm->subn->node_guid_tbl, collect_guids, pm);
1106         cl_plock_release(&pm->osm->lock);
1107
1108         /* then for each node query their counters */
1109         cl_qmap_apply_func(&pm->monitored_map, perfmgr_query_counters, pm);
1110
1111         /* clean out any nodes found to be removed during the sweep */
1112         remove_marked_nodes(pm);
1113
1114 #ifdef ENABLE_OSM_PERF_MGR_PROFILE
1115         gettimeofday(&after, NULL);
1116         diff_time(&before, &after, &after);
1117         osm_log_v2(pm->log, OSM_LOG_INFO, FILE_ID,
1118                    "PerfMgr total sweep time : %ld.%06ld s\n"
1119                    "        fastest mad      : %g us\n"
1120                    "        slowest mad      : %g us\n"
1121                    "        average mad      : %g us\n",
1122                    after.tv_sec, after.tv_usec, perfmgr_mad_stats.fastest_us,
1123                    perfmgr_mad_stats.slowest_us, perfmgr_mad_stats.avg_us);
1124         clear_mad_stats();
1125 #endif
1126
1127         cl_spinlock_acquire(&pm->lock);
1128         pm->sweep_state = PERFMGR_SWEEP_SLEEP;
1129         cl_spinlock_release(&pm->lock);
1130 }
1131
1132 /**********************************************************************
1133  * PerfMgr timer - loop continuously and signal SM to run PerfMgr
1134  * processor if enabled
1135  **********************************************************************/
1136 static void perfmgr_sweep(void *arg)
1137 {
1138         osm_perfmgr_t *pm = arg;
1139
1140         osm_sm_signal(pm->sm, OSM_SIGNAL_PERFMGR_SWEEP);
1141         cl_timer_start(&pm->sweep_timer, pm->sweep_time_s * 1000);
1142 }
1143
1144 void osm_perfmgr_shutdown(osm_perfmgr_t * pm)
1145 {
1146         OSM_LOG_ENTER(pm->log);
1147         cl_timer_stop(&pm->sweep_timer);
1148         cl_disp_unregister(pm->pc_disp_h);
1149         perfmgr_mad_unbind(pm);
1150         OSM_LOG_EXIT(pm->log);
1151 }
1152
1153 void osm_perfmgr_destroy(osm_perfmgr_t * pm)
1154 {
1155         OSM_LOG_ENTER(pm->log);
1156         perfmgr_db_destroy(pm->db);
1157         cl_timer_destroy(&pm->sweep_timer);
1158         OSM_LOG_EXIT(pm->log);
1159 }
1160
1161 /**********************************************************************
1162  * Detect if someone else on the network could have cleared the counters
1163  * without us knowing.  This is easy to detect because the counters never
1164  * wrap but are "sticky".
1165  *
1166  * The one time this will not work is if the port is getting errors fast
1167  * enough to have the reading overtake the previous reading.  In this case,
1168  * counters will be missed.
1169  **********************************************************************/
1170 static void perfmgr_check_oob_clear(osm_perfmgr_t * pm,
1171                                     monitored_node_t * mon_node, uint8_t port,
1172                                     perfmgr_db_err_reading_t * cr)
1173 {
1174         perfmgr_db_err_reading_t prev_err;
1175
1176         if (perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_err)
1177             != PERFMGR_EVENT_DB_SUCCESS) {
1178                 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Failed to find previous "
1179                         "error reading for %s (guid 0x%" PRIx64 ") port %u\n",
1180                         mon_node->name, mon_node->guid, port);
1181                 return;
1182         }
1183
1184         OSM_LOG(pm->log, OSM_LOG_DEBUG,
1185                 "Errors vs previous node %s (0x%" PRIx64 ") port %u\n"
1186                 "SE:   %"PRIu64" ?< %"PRIu64"\n"
1187                 "LE:   %"PRIu64" ?< %"PRIu64"\n"
1188                 "LD:   %"PRIu64" ?< %"PRIu64"\n"
1189                 "RE:   %"PRIu64" ?< %"PRIu64"\n"
1190                 "RPE:  %"PRIu64" ?< %"PRIu64"\n"
1191                 "SRE:  %"PRIu64" ?< %"PRIu64"\n"
1192                 "XD:   %"PRIu64" ?< %"PRIu64"\n"
1193                 "XCE:  %"PRIu64" ?< %"PRIu64"\n"
1194                 "RCE:  %"PRIu64" ?< %"PRIu64"\n"
1195                 "LI:   %"PRIu64" ?< %"PRIu64"\n"
1196                 "BO:   %"PRIu64" ?< %"PRIu64"\n"
1197                 "VL15: %"PRIu64" ?< %"PRIu64"\n"
1198                 "XW:   %"PRIu64" ?< %"PRIu64"\n"
1199                 ,
1200                 mon_node->name, mon_node->guid, port,
1201                 cr->symbol_err_cnt, prev_err.symbol_err_cnt,
1202                 cr->link_err_recover, prev_err.link_err_recover,
1203                 cr->link_downed, prev_err.link_downed,
1204                 cr->rcv_err, prev_err.rcv_err,
1205                 cr->rcv_rem_phys_err, prev_err.rcv_rem_phys_err,
1206                 cr->rcv_switch_relay_err, prev_err.rcv_switch_relay_err,
1207                 cr->xmit_discards, prev_err.xmit_discards,
1208                 cr->xmit_constraint_err, prev_err.xmit_constraint_err,
1209                 cr->rcv_constraint_err, prev_err.rcv_constraint_err,
1210                 cr->link_integrity, prev_err.link_integrity,
1211                 cr->buffer_overrun, prev_err.buffer_overrun,
1212                 cr->vl15_dropped, prev_err.vl15_dropped,
1213                 cr->xmit_wait, prev_err.xmit_wait);
1214
1215         if (cr->symbol_err_cnt < prev_err.symbol_err_cnt ||
1216             cr->link_err_recover < prev_err.link_err_recover ||
1217             cr->link_downed < prev_err.link_downed ||
1218             cr->rcv_err < prev_err.rcv_err ||
1219             cr->rcv_rem_phys_err < prev_err.rcv_rem_phys_err ||
1220             cr->rcv_switch_relay_err < prev_err.rcv_switch_relay_err ||
1221             cr->xmit_discards < prev_err.xmit_discards ||
1222             cr->xmit_constraint_err < prev_err.xmit_constraint_err ||
1223             cr->rcv_constraint_err < prev_err.rcv_constraint_err ||
1224             cr->link_integrity < prev_err.link_integrity ||
1225             cr->buffer_overrun < prev_err.buffer_overrun ||
1226             cr->vl15_dropped < prev_err.vl15_dropped ||
1227             cr->xmit_wait < prev_err.xmit_wait) {
1228                 OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 540A: "
1229                         "Detected an out of band error clear "
1230                         "on %s (0x%" PRIx64 ") port %u\n",
1231                         mon_node->name, mon_node->guid, port);
1232                 perfmgr_db_clear_prev_err(pm->db, mon_node->guid, port);
1233         }
1234 }
1235
1236 /**********************************************************************
1237  * Return 1 if the value is "close" to overflowing
1238  * "close" is defined at 25% for now
1239  **********************************************************************/
1240 static int counter_overflow_4(uint8_t val)
1241 {
1242         return (val >= 10);
1243 }
1244
1245 static int counter_overflow_8(uint8_t val)
1246 {
1247         return (val >= (UINT8_MAX - (UINT8_MAX / 4)));
1248 }
1249
1250 static int counter_overflow_16(ib_net16_t val)
1251 {
1252         return (cl_ntoh16(val) >= (UINT16_MAX - (UINT16_MAX / 4)));
1253 }
1254
1255 static int counter_overflow_32(ib_net32_t val)
1256 {
1257         return (cl_ntoh32(val) >= (UINT32_MAX - (UINT32_MAX / 4)));
1258 }
1259
1260 static int counter_overflow_64(ib_net64_t val)
1261 {
1262         return (cl_ntoh64(val) >= (UINT64_MAX - (UINT64_MAX / 4)));
1263 }
1264
1265 /**********************************************************************
1266  * Check if the port counters have overflowed and if so issue a clear
1267  * MAD to the port
1268  **********************************************************************/
1269 static void perfmgr_check_overflow(osm_perfmgr_t * pm,
1270                                    monitored_node_t * mon_node, int16_t pkey_ix,
1271                                    uint8_t port, ib_port_counters_t * pc,
1272                                    boolean_t xmit_wait_sup)
1273 {
1274         osm_madw_context_t mad_context;
1275         ib_api_status_t status;
1276         ib_net32_t remote_qp;
1277         uint16_t counter_select;
1278         uint8_t counter_select2;
1279
1280         OSM_LOG_ENTER(pm->log);
1281
1282         if (counter_overflow_16(pc->symbol_err_cnt) ||
1283             counter_overflow_8(pc->link_err_recover) ||
1284             counter_overflow_8(pc->link_downed) ||
1285             counter_overflow_16(pc->rcv_err) ||
1286             counter_overflow_16(pc->rcv_rem_phys_err) ||
1287             counter_overflow_16(pc->rcv_switch_relay_err) ||
1288             counter_overflow_16(pc->xmit_discards) ||
1289             counter_overflow_8(pc->xmit_constraint_err) ||
1290             counter_overflow_8(pc->rcv_constraint_err) ||
1291             counter_overflow_4(PC_LINK_INT(pc->link_int_buffer_overrun)) ||
1292             counter_overflow_4(PC_BUF_OVERRUN(pc->link_int_buffer_overrun)) ||
1293             counter_overflow_16(pc->vl15_dropped) ||
1294             (xmit_wait_sup && counter_overflow_32(pc->xmit_wait)) ||
1295             (!pce_supported(mon_node, port) &&
1296             (counter_overflow_32(pc->xmit_data) ||
1297              counter_overflow_32(pc->rcv_data) ||
1298              counter_overflow_32(pc->xmit_pkts) ||
1299              counter_overflow_32(pc->rcv_pkts)))) {
1300                 osm_node_t *p_node = NULL;
1301                 ib_net16_t lid = 0;
1302
1303                 if (!mon_node->port[port].valid)
1304                         goto Exit;
1305
1306                 osm_log_v2(pm->log, OSM_LOG_VERBOSE, FILE_ID,
1307                            "PerfMgr: Counter overflow: %s (0x%" PRIx64
1308                            ") port %d; clearing counters\n",
1309                            mon_node->name, mon_node->guid, port);
1310
1311                 cl_plock_acquire(&pm->osm->lock);
1312                 p_node =
1313                     osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid));
1314                 lid = get_lid(p_node, port, mon_node);
1315                 cl_plock_release(&pm->osm->lock);
1316                 if (lid == 0) {
1317                         OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 540C: "
1318                                 "Failed to clear counters for %s (0x%"
1319                                 PRIx64 ") port %d; failed to get lid\n",
1320                                 mon_node->name, mon_node->guid, port);
1321                         goto Exit;
1322                 }
1323
1324                 remote_qp = get_qp(NULL, port);
1325
1326                 mad_context.perfmgr_context.node_guid = mon_node->guid;
1327                 mad_context.perfmgr_context.port = port;
1328                 mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET;
1329
1330                 /* apparently some HW uses the same counters for the 32 and 64
1331                  * bit versions and a clear of them in the PortCounters
1332                  * attribute also clears the ExtendedPortCounters equivalant
1333                  * counters
1334                  */
1335                 if (pce_supported(mon_node, port))
1336                         counter_select = 0x0fff;
1337                 else
1338                         counter_select = 0xffff;
1339
1340                 if (xmit_wait_sup)
1341                         counter_select2 = 1;
1342                 else
1343                         counter_select2 = 0;
1344
1345                 status = perfmgr_send_pc_mad(pm, lid, remote_qp, pkey_ix,
1346                                              port, IB_MAD_METHOD_SET,
1347                                              counter_select,
1348                                              counter_select2,
1349                                              &mad_context,
1350                                              0); /* FIXME SL != 0 */
1351                 if (status != IB_SUCCESS)
1352                         OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5411: "
1353                                 "Failed to send clear counters MAD for %s (0x%"
1354                                 PRIx64 ") port %d\n",
1355                                 mon_node->name, mon_node->guid, port);
1356
1357                 perfmgr_db_clear_prev_err(pm->db, mon_node->guid, port);
1358                 if (!pce_supported(mon_node, port))
1359                         perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port);
1360         }
1361
1362 Exit:
1363         OSM_LOG_EXIT(pm->log);
1364 }
1365
1366 /**********************************************************************
1367  * Check if the port counters have overflowed and if so issue a clear
1368  * MAD to the port
1369  **********************************************************************/
1370 static void perfmgr_check_pce_overflow(osm_perfmgr_t * pm,
1371                                        monitored_node_t * mon_node,
1372                                        int16_t pkey_ix,
1373                                        uint8_t port,
1374                                        ib_port_counters_ext_t * pc)
1375 {
1376         osm_madw_context_t mad_context;
1377         ib_api_status_t status;
1378         ib_net32_t remote_qp;
1379
1380         OSM_LOG_ENTER(pm->log);
1381
1382         if (counter_overflow_64(pc->xmit_data) ||
1383             counter_overflow_64(pc->rcv_data) ||
1384             counter_overflow_64(pc->xmit_pkts) ||
1385             counter_overflow_64(pc->rcv_pkts) ||
1386             (ietf_supported(mon_node, port) &&
1387             (counter_overflow_64(pc->unicast_xmit_pkts) ||
1388             counter_overflow_64(pc->unicast_rcv_pkts) ||
1389             counter_overflow_64(pc->multicast_xmit_pkts) ||
1390             counter_overflow_64(pc->multicast_rcv_pkts)))) {
1391                 osm_node_t *p_node = NULL;
1392                 ib_net16_t lid = 0;
1393
1394                 if (!mon_node->port[port].valid)
1395                         goto Exit;
1396
1397                 osm_log(pm->log, OSM_LOG_VERBOSE,
1398                         "PerfMgr: PortCountersExtended overflow: %s (0x%"
1399                         PRIx64 ") port %d; clearing counters\n",
1400                         mon_node->name, mon_node->guid, port);
1401
1402                 cl_plock_acquire(&pm->osm->lock);
1403                 p_node =
1404                     osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid));
1405                 lid = get_lid(p_node, port, mon_node);
1406                 cl_plock_release(&pm->osm->lock);
1407                 if (lid == 0) {
1408                         OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5418: "
1409                                 "Failed to clear counters for %s (0x%"
1410                                 PRIx64 ") port %d; failed to get lid\n",
1411                                 mon_node->name, mon_node->guid, port);
1412                         goto Exit;
1413                 }
1414
1415                 remote_qp = get_qp(NULL, port);
1416
1417                 mad_context.perfmgr_context.node_guid = mon_node->guid;
1418                 mad_context.perfmgr_context.port = port;
1419                 mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET;
1420                 /* clear port counters */
1421                 status = perfmgr_send_pce_mad(pm, lid, remote_qp, pkey_ix,
1422                                               port, IB_MAD_METHOD_SET,
1423                                               &mad_context,
1424                                               0); /* FIXME SL != 0 */
1425                 if (status != IB_SUCCESS)
1426                         OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5419: "
1427                                 "Failed to send clear counters MAD for %s (0x%"
1428                                 PRIx64 ") port %d\n",
1429                                 mon_node->name, mon_node->guid, port);
1430
1431                 perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port);
1432         }
1433
1434 Exit:
1435         OSM_LOG_EXIT(pm->log);
1436 }
1437
1438 /**********************************************************************
1439  * Check values for logging of errors
1440  **********************************************************************/
1441 static void perfmgr_log_errors(osm_perfmgr_t * pm,
1442                                monitored_node_t * mon_node, uint8_t port,
1443                                perfmgr_db_err_reading_t * reading)
1444 {
1445         perfmgr_db_err_reading_t prev_read;
1446         perfmgr_db_err_t err =
1447             perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_read);
1448         uint64_t cur, prev;
1449
1450         if (err != PERFMGR_EVENT_DB_SUCCESS) {
1451                 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Failed to find previous "
1452                         "reading for %s (0x%" PRIx64 ") port %u\n",
1453                         mon_node->name, mon_node->guid, port);
1454                 return;
1455         }
1456
1457 #define LOG_ERR_CNT(errname, errnum, counter_name) \
1458         if (reading->counter_name > prev_read.counter_name) { \
1459                 if (mon_node->port[port].remote_valid == TRUE) \
1460                         OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR %s: " \
1461                                 "%s : %" PRIu64 " : node " \
1462                                 "\"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u " \
1463                                 "connected to \"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n", \
1464                                 errnum, errname, \
1465                                 reading->counter_name - prev_read.counter_name, \
1466                                 mon_node->name, mon_node->guid, port, \
1467                                 mon_node->port[port].remote_name, \
1468                                 mon_node->port[port].remote_guid, \
1469                                 mon_node->port[port].remote_port); \
1470                 else \
1471                         OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR %s: " \
1472                                 "%s : %" PRIu64 " : node " \
1473                                 "\"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n", \
1474                                 errnum, errname, \
1475                                 reading->counter_name - prev_read.counter_name, \
1476                                 mon_node->name, mon_node->guid, port); \
1477         }
1478
1479         LOG_ERR_CNT("SymbolErrorCounter",           "5431", symbol_err_cnt);
1480         LOG_ERR_CNT("LinkErrorRecoveryCounter",     "5432", link_err_recover);
1481         LOG_ERR_CNT("LinkDownedCounter",            "5433", link_downed);
1482         LOG_ERR_CNT("PortRcvErrors",                "5434", rcv_err);
1483         LOG_ERR_CNT("PortRcvRemotePhysicalErrors",  "5435", rcv_rem_phys_err);
1484         LOG_ERR_CNT("PortRcvSwitchRelayErrors",     "5436", rcv_switch_relay_err);
1485         LOG_ERR_CNT("PortXmitDiscards",             "5437", xmit_discards);
1486         LOG_ERR_CNT("PortXmitConstraintErrors",     "5438", xmit_constraint_err);
1487         LOG_ERR_CNT("PortRcvConstraintErrors",      "5439", rcv_constraint_err);
1488         LOG_ERR_CNT("LocalLinkIntegrityErrors",     "543A", link_integrity);
1489         LOG_ERR_CNT("ExcessiveBufferOverrunErrors", "543B", buffer_overrun);
1490         LOG_ERR_CNT("VL15Dropped",                  "543C", vl15_dropped);
1491
1492         cur = reading->xmit_wait;
1493         prev = prev_read.xmit_wait;
1494         if (pm->xmit_wait_log && cur > prev &&
1495             (cur - prev) >= pm->xmit_wait_threshold) {
1496                 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 543D: XmitWait : %" PRIu64
1497                         " : node \"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n",
1498                         cur - prev, mon_node->name, mon_node->guid, port);
1499         }
1500 }
1501
1502 static int16_t validate_redir_pkey(osm_perfmgr_t *pm, ib_net16_t pkey)
1503 {
1504         int16_t pkey_ix = -1;
1505         osm_port_t *p_port;
1506         osm_pkey_tbl_t *p_pkey_tbl;
1507         ib_net16_t *p_orig_pkey;
1508         uint16_t block;
1509         uint8_t index;
1510
1511         OSM_LOG_ENTER(pm->log);
1512
1513         CL_PLOCK_ACQUIRE(pm->sm->p_lock);
1514         p_port = osm_get_port_by_guid(pm->subn, pm->port_guid);
1515         if (!p_port) {
1516                 CL_PLOCK_RELEASE(pm->sm->p_lock);
1517                 OSM_LOG(pm->log, OSM_LOG_ERROR,
1518                         "ERR 541E: No PerfMgr port object\n");
1519                 goto Exit;
1520         }
1521         if (p_port->p_physp && osm_physp_is_valid(p_port->p_physp)) {
1522                 p_pkey_tbl = &p_port->p_physp->pkeys;
1523                 if (!p_pkey_tbl) {
1524                         CL_PLOCK_RELEASE(pm->sm->p_lock);
1525                         OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1526                                 "No PKey table found for PerfMgr port\n");
1527                         goto Exit;
1528                 }
1529                 p_orig_pkey = cl_map_get(&p_pkey_tbl->keys,
1530                                          ib_pkey_get_base(pkey));
1531                 if (!p_orig_pkey) {
1532                         CL_PLOCK_RELEASE(pm->sm->p_lock);
1533                         OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1534                                 "PKey 0x%x not found for PerfMgr port\n",
1535                                 cl_ntoh16(pkey));
1536                         goto Exit;
1537                 }
1538                 if (osm_pkey_tbl_get_block_and_idx(p_pkey_tbl, p_orig_pkey,
1539                                                    &block, &index) == IB_SUCCESS) {
1540                         CL_PLOCK_RELEASE(pm->sm->p_lock);
1541                         pkey_ix = block * IB_NUM_PKEY_ELEMENTS_IN_BLOCK + index;
1542                 } else {
1543                         CL_PLOCK_RELEASE(pm->sm->p_lock);
1544                         OSM_LOG(pm->log, OSM_LOG_ERROR,
1545                                 "ERR 541F: Failed to obtain P_Key 0x%04x "
1546                                 "block and index for PerfMgr port\n",
1547                                 cl_ntoh16(pkey));
1548                 }
1549         } else {
1550                 CL_PLOCK_RELEASE(pm->sm->p_lock);
1551                 OSM_LOG(pm->log, OSM_LOG_ERROR,
1552                         "ERR 5420: Local PerfMgt port physp invalid\n");
1553         }
1554
1555 Exit:
1556         OSM_LOG_EXIT(pm->log);
1557         return pkey_ix;
1558 }
1559
1560 static boolean_t handle_redirect(osm_perfmgr_t *pm,
1561                             ib_class_port_info_t *cpi,
1562                             monitored_node_t *p_mon_node,
1563                             uint8_t port,
1564                             osm_madw_context_t *mad_context)
1565 {
1566         char gid_str[INET6_ADDRSTRLEN];
1567         ib_api_status_t status;
1568         boolean_t valid = TRUE;
1569         int16_t pkey_ix = 0;
1570         uint8_t mad_method;
1571
1572         OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1573                 "Redirection to LID %u GID %s QP 0x%x received\n",
1574                 cl_ntoh16(cpi->redir_lid),
1575                 inet_ntop(AF_INET6, cpi->redir_gid.raw, gid_str,
1576                           sizeof gid_str), cl_ntoh32(cpi->redir_qp));
1577
1578         if (!pm->subn->opt.perfmgr_redir) {
1579                 OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1580                         "Redirection requested but disabled\n");
1581                 valid = FALSE;
1582         }
1583
1584         /* valid redirection ? */
1585         if (cpi->redir_lid == 0) {
1586                 if (!ib_gid_is_notzero(&cpi->redir_gid)) {
1587                         OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1588                                 "Invalid redirection "
1589                                 "(both redirect LID and GID are zero)\n");
1590                         valid = FALSE;
1591                 }
1592         }
1593         if (cpi->redir_qp == 0) {
1594                 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectQP\n");
1595                 valid = FALSE;
1596         }
1597         if (cpi->redir_pkey == 0) {
1598                 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectP_Key\n");
1599                 valid = FALSE;
1600         }
1601         if (cpi->redir_qkey != IB_QP1_WELL_KNOWN_Q_KEY) {
1602                 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectQ_Key\n");
1603                 valid = FALSE;
1604         }
1605
1606         pkey_ix = validate_redir_pkey(pm, cpi->redir_pkey);
1607         if (pkey_ix == -1) {
1608                 OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1609                         "Index for Pkey 0x%x not found\n",
1610                         cl_ntoh16(cpi->redir_pkey));
1611                 valid = FALSE;
1612         }
1613
1614         if (cpi->redir_lid == 0) {
1615                 /* GID redirection: get PathRecord information */
1616                 OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1617                         "GID redirection not currently supported\n");
1618                 goto Exit;
1619         }
1620
1621         if (!valid)
1622                 goto Exit;
1623
1624         /* LID redirection support (easier than GID redirection) */
1625         cl_plock_acquire(&pm->osm->lock);
1626         p_mon_node->port[port].redirection = TRUE;
1627         p_mon_node->port[port].valid = valid;
1628         memcpy(&p_mon_node->port[port].gid, &cpi->redir_gid,
1629                sizeof(ib_gid_t));
1630         p_mon_node->port[port].lid = cpi->redir_lid;
1631         p_mon_node->port[port].qp = cpi->redir_qp;
1632         p_mon_node->port[port].pkey = cpi->redir_pkey;
1633         if (pkey_ix != -1)
1634                 p_mon_node->port[port].pkey_ix = pkey_ix;
1635         cl_plock_release(&pm->osm->lock);
1636
1637         /* either */
1638         if (pm->query_cpi)
1639         {
1640                 /* issue a CPI query to the redirected location */
1641                 mad_method = IB_MAD_METHOD_GET;
1642                 p_mon_node->port[port].cpi_valid = FALSE;
1643                 status = perfmgr_send_cpi_mad(pm, cpi->redir_lid,
1644                                                 cpi->redir_qp, pkey_ix,
1645                                                 port, mad_context,
1646                                                 0); /* FIXME SL != 0 */
1647         } else {
1648                 /* reissue the original query to the redirected location */
1649                 uint8_t counter_select2;
1650
1651                 if (xmit_wait_supported(p_mon_node, port))
1652                         counter_select2 = 1;
1653                 else
1654                         counter_select2 = 0;
1655
1656                 mad_method = mad_context->perfmgr_context.mad_method;
1657                 if (mad_context->perfmgr_context.mad_attr_id
1658                     == IB_MAD_ATTR_PORT_CNTRS) {
1659                         status = perfmgr_send_pc_mad(pm, cpi->redir_lid,
1660                                                      cpi->redir_qp,
1661                                                      pkey_ix, port,
1662                                                      mad_method,
1663                                                      0xffff,
1664                                                      counter_select2,
1665                                                      mad_context,
1666                                                      0); /* FIXME SL != 0 */
1667                 } else {
1668                         status = perfmgr_send_pce_mad(pm, cpi->redir_lid,
1669                                                       cpi->redir_qp,
1670                                                       pkey_ix, port,
1671                                                       mad_method,
1672                                                       mad_context,
1673                                                       0); /* FIXME SL != 0 */
1674                 }
1675         }
1676         if (status != IB_SUCCESS)
1677                 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5414: "
1678                         "Failed to send redirected MAD "
1679                         "with method 0x%x for node %s "
1680                         "(NodeGuid 0x%" PRIx64 ") port %d\n",
1681                         mad_method, p_mon_node->name, p_mon_node->guid, port);
1682 Exit:
1683         return (valid);
1684 }
1685
1686 /**********************************************************************
1687  * Detect if someone else on the network could have cleared the counters
1688  * without us knowing.  This is easy to detect because the counters never
1689  * wrap but are "sticky" PortCountersExtended version.
1690  *
1691  * The one time this will not work is if the port is getting errors fast
1692  * enough to have the reading overtake the previous reading.  In this case,
1693  * counters will be missed.
1694  **********************************************************************/
1695 static void perfmgr_check_data_cnt_oob_clear(osm_perfmgr_t * pm,
1696                                         monitored_node_t * mon_node,
1697                                         uint8_t port,
1698                                         perfmgr_db_data_cnt_reading_t * dc)
1699 {
1700         perfmgr_db_data_cnt_reading_t prev_dc;
1701
1702         if (perfmgr_db_get_prev_dc(pm->db, mon_node->guid, port, &prev_dc)
1703             != PERFMGR_EVENT_DB_SUCCESS) {
1704                 OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1705                         "Failed to find previous data count "
1706                         "reading for %s (0x%" PRIx64 ") port %u\n",
1707                         mon_node->name, mon_node->guid, port);
1708                 return;
1709         }
1710
1711         OSM_LOG(pm->log, OSM_LOG_DEBUG,
1712                 "Data vs previous node %s (0x%" PRIx64 ") port %u\n"
1713                 "TX:    %"PRIu64" ?< %"PRIu64"\n"
1714                 "RX:    %"PRIu64" ?< %"PRIu64"\n"
1715                 "TXP:   %"PRIu64" ?< %"PRIu64"\n"
1716                 "RXP:   %"PRIu64" ?< %"PRIu64"\n"
1717                 "UTXP:  %"PRIu64" ?< %"PRIu64"\n"
1718                 "URXP:  %"PRIu64" ?< %"PRIu64"\n"
1719                 "MTXP:  %"PRIu64" ?< %"PRIu64"\n"
1720                 "MRXP:  %"PRIu64" ?< %"PRIu64"\n"
1721                 ,
1722                 mon_node->name, mon_node->guid, port,
1723                 dc->xmit_data, prev_dc.xmit_data,
1724                 dc->rcv_data, prev_dc.rcv_data,
1725                 dc->xmit_pkts, prev_dc.xmit_pkts,
1726                 dc->rcv_pkts, prev_dc.rcv_pkts,
1727                 dc->unicast_xmit_pkts, prev_dc.unicast_xmit_pkts,
1728                 dc->unicast_rcv_pkts, prev_dc.unicast_rcv_pkts,
1729                 dc->multicast_xmit_pkts, prev_dc.multicast_xmit_pkts,
1730                 dc->multicast_rcv_pkts, prev_dc.multicast_rcv_pkts);
1731
1732         if (dc->xmit_data < prev_dc.xmit_data ||
1733             dc->rcv_data < prev_dc.rcv_data ||
1734             dc->xmit_pkts < prev_dc.xmit_pkts ||
1735             dc->rcv_pkts < prev_dc.rcv_pkts ||
1736             (ietf_supported(mon_node, port) &&
1737             (dc->unicast_xmit_pkts < prev_dc.unicast_xmit_pkts ||
1738             dc->unicast_rcv_pkts < prev_dc.unicast_rcv_pkts ||
1739             dc->multicast_xmit_pkts < prev_dc.multicast_xmit_pkts ||
1740             dc->multicast_rcv_pkts < prev_dc.multicast_rcv_pkts))) {
1741                 OSM_LOG(pm->log, OSM_LOG_ERROR,
1742                         "PerfMgr: ERR 540B: Detected an out of band data counter "
1743                         "clear on node %s (0x%" PRIx64 ") port %u\n",
1744                         mon_node->name, mon_node->guid, port);
1745
1746                 perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port);
1747         }
1748 }
1749
1750 /**********************************************************************
1751  * The dispatcher uses a thread pool which will call this function when
1752  * there is a thread available to process the mad received on the wire
1753  **********************************************************************/
1754 static void pc_recv_process(void *context, void *data)
1755 {
1756         osm_perfmgr_t *pm = context;
1757         osm_madw_t *p_madw = data;
1758         osm_madw_context_t *mad_context = &p_madw->context;
1759         ib_mad_t *p_mad = osm_madw_get_mad_ptr(p_madw);
1760         uint64_t node_guid = mad_context->perfmgr_context.node_guid;
1761         uint8_t port = mad_context->perfmgr_context.port;
1762         perfmgr_db_err_reading_t err_reading;
1763         perfmgr_db_data_cnt_reading_t data_reading;
1764         cl_map_item_t *p_node;
1765         monitored_node_t *p_mon_node;
1766         ib_class_port_info_t *cpi = NULL;
1767
1768         OSM_LOG_ENTER(pm->log);
1769
1770         /*
1771          * get the monitored node struct to have the printable name
1772          * for log messages
1773          */
1774         if ((p_node = cl_qmap_get(&pm->monitored_map, node_guid)) ==
1775             cl_qmap_end(&pm->monitored_map)) {
1776                 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5412: GUID 0x%016"
1777                         PRIx64 " not found in monitored map\n", node_guid);
1778                 goto Exit;
1779         }
1780         p_mon_node = (monitored_node_t *) p_node;
1781
1782         OSM_LOG(pm->log, OSM_LOG_VERBOSE,
1783                 "Processing received MAD status 0x%x context 0x%"
1784                 PRIx64 " port %u\n", cl_ntoh16(p_mad->status), node_guid, port);
1785
1786         CL_ASSERT(p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS ||
1787                   p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS_EXT ||
1788                   p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO);
1789
1790         cl_plock_acquire(&pm->osm->lock);
1791         /* validate port number */
1792         if (port >= p_mon_node->num_ports) {
1793                 cl_plock_release(&pm->osm->lock);
1794                 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5413: "
1795                         "Invalid port num %d for GUID 0x%016"
1796                         PRIx64 " num ports %d\n", port, node_guid,
1797                         p_mon_node->num_ports);
1798                 goto Exit;
1799         }
1800         cl_plock_release(&pm->osm->lock);
1801
1802         /* capture CLASS_PORT_INFO data */
1803         if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
1804                 boolean_t cpi_valid = TRUE;
1805
1806                 cpi = (ib_class_port_info_t *) &
1807                     (osm_madw_get_perfmgt_mad_ptr(p_madw)->data);
1808
1809                 /* Response could be redirection (IBM eHCA PMA does this) */
1810                 if (p_mad->status & IB_MAD_STATUS_REDIRECT)
1811                         cpi_valid = handle_redirect(pm, cpi, p_mon_node, port,
1812                                                         mad_context);
1813
1814                 if (pm->query_cpi && cpi_valid) {
1815                         cl_plock_acquire(&pm->osm->lock);
1816                         if (p_mon_node->node_type == IB_NODE_TYPE_SWITCH) {
1817                                 int i;
1818                                 for (i = p_mon_node->esp0 ? 0 : 1;
1819                                      i < p_mon_node->num_ports;
1820                                      i++) {
1821                                         p_mon_node->port[i].cap_mask = cpi->cap_mask;
1822                                         p_mon_node->port[i].cpi_valid = cpi_valid;
1823                                 }
1824                         } else {
1825                                 p_mon_node->port[port].cap_mask = cpi->cap_mask;
1826                                 p_mon_node->port[port].cpi_valid = cpi_valid;
1827                         }
1828                         cl_plock_release(&pm->osm->lock);
1829                 }
1830                 goto Exit;
1831         }
1832
1833         if (p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS_EXT) {
1834                 ib_port_counters_ext_t *ext_wire_read =
1835                                 (ib_port_counters_ext_t *)
1836                                 &osm_madw_get_perfmgt_mad_ptr(p_madw)->data;
1837
1838                 /* convert wire data to perfmgr data counter reading */
1839                 perfmgr_db_fill_data_cnt_read_pce(ext_wire_read, &data_reading,
1840                                                   ietf_supported(p_mon_node,
1841                                                                  port));
1842
1843                 /* add counter */
1844                 if (mad_context->perfmgr_context.mad_method
1845                     == IB_MAD_METHOD_GET) {
1846                         /* detect an out of band clear on the port */
1847                         perfmgr_check_data_cnt_oob_clear(pm, p_mon_node, port,
1848                                                     &data_reading);
1849
1850                         perfmgr_db_add_dc_reading(pm->db, node_guid, port,
1851                                                   &data_reading,
1852                                                   ietf_supported(p_mon_node,
1853                                                                  port));
1854                 } else {
1855                         perfmgr_db_clear_prev_dc(pm->db, node_guid, port);
1856                 }
1857
1858                 perfmgr_check_pce_overflow(pm, p_mon_node,
1859                                            p_mon_node->port[port].pkey_ix,
1860                                            port, ext_wire_read);
1861         } else {
1862                 boolean_t pce_sup = pce_supported(p_mon_node, port);
1863                 boolean_t xmit_wait_sup = xmit_wait_supported(p_mon_node, port);
1864                 ib_port_counters_t *wire_read =
1865                                 (ib_port_counters_t *)
1866                                 &osm_madw_get_perfmgt_mad_ptr(p_madw)->data;
1867
1868                 perfmgr_db_fill_err_read(wire_read, &err_reading, xmit_wait_sup);
1869                 if (!pce_sup)
1870                         perfmgr_db_fill_data_cnt_read_pc(wire_read, &data_reading);
1871
1872                 if (mad_context->perfmgr_context.mad_method == IB_MAD_METHOD_GET) {
1873                         /* detect an out of band clear on the port */
1874                         perfmgr_check_oob_clear(pm, p_mon_node, port, &err_reading);
1875                         if (!pce_sup)
1876                                 perfmgr_check_data_cnt_oob_clear(pm, p_mon_node, port,
1877                                                             &data_reading);
1878
1879                         /* log errors from this reading */
1880                         if (pm->subn->opt.perfmgr_log_errors)
1881                                 perfmgr_log_errors(pm, p_mon_node, port, &err_reading);
1882
1883                         perfmgr_db_add_err_reading(pm->db, node_guid, port,
1884                                                    &err_reading);
1885                         if (!pce_sup)
1886                                 perfmgr_db_add_dc_reading(pm->db, node_guid, port,
1887                                                           &data_reading, 0);
1888                 } else {
1889                         perfmgr_db_clear_prev_err(pm->db, node_guid, port);
1890                         if (!pce_sup)
1891                                 perfmgr_db_clear_prev_dc(pm->db, node_guid, port);
1892                 }
1893
1894                 perfmgr_check_overflow(pm, p_mon_node, p_mon_node->port[port].pkey_ix,
1895                                        port, wire_read, xmit_wait_sup);
1896
1897         }
1898
1899 #ifdef ENABLE_OSM_PERF_MGR_PROFILE
1900         do {
1901                 struct timeval proc_time;
1902                 gettimeofday(&proc_time, NULL);
1903                 diff_time(&p_madw->context.perfmgr_context.query_start,
1904                           &proc_time, &proc_time);
1905                 update_mad_stats(&proc_time);
1906         } while (0);
1907 #endif
1908
1909 Exit:
1910         osm_mad_pool_put(pm->mad_pool, p_madw);
1911
1912         OSM_LOG_EXIT(pm->log);
1913 }
1914
1915 /**********************************************************************
1916  * Initialize the PerfMgr object
1917  **********************************************************************/
1918 ib_api_status_t osm_perfmgr_init(osm_perfmgr_t * pm, osm_opensm_t * osm,
1919                                  const osm_subn_opt_t * p_opt)
1920 {
1921         ib_api_status_t status;
1922
1923         OSM_LOG_ENTER(&osm->log);
1924
1925         OSM_LOG(&osm->log, OSM_LOG_VERBOSE, "Initializing PerfMgr\n");
1926
1927         memset(pm, 0, sizeof(*pm));
1928
1929         pm->subn = &osm->subn;
1930         pm->sm = &osm->sm;
1931         pm->log = &osm->log;
1932         pm->mad_pool = &osm->mad_pool;
1933         pm->vendor = osm->p_vendor;
1934         pm->trans_id = PERFMGR_INITIAL_TID_VALUE;
1935         pm->state =
1936             p_opt->perfmgr ? PERFMGR_STATE_ENABLED : PERFMGR_STATE_DISABLE;
1937         pm->sweep_state = PERFMGR_SWEEP_SLEEP;
1938         cl_spinlock_init(&pm->lock);
1939         pm->sweep_time_s = p_opt->perfmgr_sweep_time_s;
1940         pm->max_outstanding_queries = p_opt->perfmgr_max_outstanding_queries;
1941         pm->ignore_cas = p_opt->perfmgr_ignore_cas;
1942         pm->osm = osm;
1943         pm->local_port = -1;
1944
1945         status = cl_timer_init(&pm->sweep_timer, perfmgr_sweep, pm);
1946         if (status != IB_SUCCESS)
1947                 goto Exit;
1948
1949         status = IB_INSUFFICIENT_RESOURCES;
1950         pm->db = perfmgr_db_construct(pm);
1951         if (!pm->db) {
1952                 pm->state = PERFMGR_STATE_NO_DB;
1953                 goto Exit;
1954         }
1955
1956         pm->pc_disp_h = cl_disp_register(&osm->disp, OSM_MSG_MAD_PORT_COUNTERS,
1957                                          pc_recv_process, pm);
1958         if (pm->pc_disp_h == CL_DISP_INVALID_HANDLE) {
1959                 perfmgr_db_destroy(pm->db);
1960                 goto Exit;
1961         }
1962
1963         init_monitored_nodes(pm);
1964
1965         if (pm->state == PERFMGR_STATE_ENABLED)
1966                 cl_timer_start(&pm->sweep_timer, pm->sweep_time_s * 1000);
1967
1968         pm->rm_nodes = p_opt->perfmgr_rm_nodes;
1969         pm->query_cpi = p_opt->perfmgr_query_cpi;
1970         pm->xmit_wait_log = p_opt->perfmgr_xmit_wait_log;
1971         pm->xmit_wait_threshold = p_opt->perfmgr_xmit_wait_threshold;
1972         status = IB_SUCCESS;
1973 Exit:
1974         OSM_LOG_EXIT(pm->log);
1975         return status;
1976 }
1977
1978 /**********************************************************************
1979  * Clear the counters from the db
1980  **********************************************************************/
1981 void osm_perfmgr_clear_counters(osm_perfmgr_t * pm)
1982 {
1983         /**
1984          * FIXME todo issue clear on the fabric?
1985          */
1986         perfmgr_db_clear_counters(pm->db);
1987         osm_log_v2(pm->log, OSM_LOG_INFO, FILE_ID, "PerfMgr counters cleared\n");
1988 }
1989
1990 /*******************************************************************
1991  * Dump the DB information to the file specified
1992  *******************************************************************/
1993 void osm_perfmgr_dump_counters(osm_perfmgr_t * pm, perfmgr_db_dump_t dump_type)
1994 {
1995         char path[256];
1996         char *file_name;
1997         if (pm->subn->opt.event_db_dump_file)
1998                 file_name = pm->subn->opt.event_db_dump_file;
1999         else {
2000                 snprintf(path, sizeof(path), "%s/%s",
2001                          pm->subn->opt.dump_files_dir,
2002                          OSM_PERFMGR_DEFAULT_DUMP_FILE);
2003                 file_name = path;
2004         }
2005         if (perfmgr_db_dump(pm->db, file_name, dump_type) != 0)
2006                 OSM_LOG(pm->log, OSM_LOG_ERROR, "Failed to dump file %s : %s",
2007                         file_name, strerror(errno));
2008 }
2009
2010 /*******************************************************************
2011  * Print the DB information to the fp specified
2012  *******************************************************************/
2013 void osm_perfmgr_print_counters(osm_perfmgr_t * pm, char *nodename, FILE * fp,
2014                                 char *port, int err_only)
2015 {
2016         if (nodename) {
2017                 char *end = NULL;
2018                 uint64_t guid = strtoull(nodename, &end, 0);
2019                 if (nodename + strlen(nodename) != end)
2020                         perfmgr_db_print_by_name(pm->db, nodename, fp, port,
2021                                                  err_only);
2022                 else
2023                         perfmgr_db_print_by_guid(pm->db, guid, fp, port,
2024                                                  err_only);
2025         } else
2026                 perfmgr_db_print_all(pm->db, fp, err_only);
2027 }
2028
2029 void osm_perfmgr_update_nodename(osm_perfmgr_t *pm, uint64_t node_guid,
2030                                 char *nodename)
2031 {
2032         if (pm->db)
2033                 perfmgr_db_update_name(pm->db, node_guid, nodename);
2034 }
2035 #endif                          /* ENABLE_OSM_PERF_MGR */