]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/ofed/opensm/opensm/osm_congestion_control.c
Upgrade to Bzip2 version 1.0.7.
[FreeBSD/FreeBSD.git] / contrib / ofed / opensm / opensm / osm_congestion_control.c
1 /*
2  * Copyright (c) 2006-2009 Voltaire, Inc. All rights reserved.
3  * Copyright (c) 2009 HNR Consulting. All rights reserved.
4  * Copyright (c) 2012 Lawrence Livermore National Lab.  All rights reserved.
5  * Copyright (c) 2014 Mellanox Technologies LTD. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  *
35  */
36
37 /*
38  * Abstract:
39  *    OSM Congestion Control configuration implementation
40  *
41  * Author:
42  *    Albert Chu, LLNL
43  */
44
45 #if HAVE_CONFIG_H
46 #  include <config.h>
47 #endif                          /* HAVE_CONFIG_H */
48
49 #include <stdlib.h>
50 #include <string.h>
51
52 #include <iba/ib_types.h>
53 #include <complib/cl_debug.h>
54 #include <opensm/osm_file_ids.h>
55 #define FILE_ID OSM_FILE_CONGESTION_CONTROL_C
56 #include <opensm/osm_subnet.h>
57 #include <opensm/osm_opensm.h>
58 #include <opensm/osm_log.h>
59 #include <opensm/osm_subnet.h>
60 #include <opensm/osm_congestion_control.h>
61
62 #define CONGESTION_CONTROL_INITIAL_TID_VALUE 0x7A93
63
64 static void cc_mad_post(osm_congestion_control_t *p_cc,
65                         osm_madw_t *p_madw,
66                         osm_node_t *p_node,
67                         osm_physp_t *p_physp,
68                         ib_net16_t attr_id,
69                         ib_net32_t attr_mod)
70 {
71         osm_subn_opt_t *p_opt = &p_cc->subn->opt;
72         ib_cc_mad_t *p_cc_mad;
73         uint8_t port;
74
75         OSM_LOG_ENTER(p_cc->log);
76
77         port = osm_physp_get_port_num(p_physp);
78
79         p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw);
80
81         p_cc_mad->header.base_ver = 1;
82         p_cc_mad->header.mgmt_class = IB_MCLASS_CC;
83         p_cc_mad->header.class_ver = 2;
84         p_cc_mad->header.method = IB_MAD_METHOD_SET;
85         p_cc_mad->header.status = 0;
86         p_cc_mad->header.class_spec = 0;
87         p_cc_mad->header.trans_id =
88                 cl_hton64((uint64_t) cl_atomic_inc(&p_cc->trans_id) &
89                           (uint64_t) (0xFFFFFFFF));
90         if (p_cc_mad->header.trans_id == 0)
91                 p_cc_mad->header.trans_id =
92                         cl_hton64((uint64_t) cl_atomic_inc(&p_cc->trans_id) &
93                                   (uint64_t) (0xFFFFFFFF));
94         p_cc_mad->header.attr_id = attr_id;
95         p_cc_mad->header.resv = 0;
96         p_cc_mad->header.attr_mod = attr_mod;
97
98         p_cc_mad->cc_key = p_opt->cc_key;
99
100         memset(p_cc_mad->log_data, '\0', IB_CC_LOG_DATA_SIZE);
101
102         p_madw->mad_addr.dest_lid = osm_node_get_base_lid(p_node, port);
103         p_madw->mad_addr.addr_type.gsi.remote_qp = IB_QP1;
104         p_madw->mad_addr.addr_type.gsi.remote_qkey =
105                 cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY);
106         p_madw->resp_expected = TRUE;
107         p_madw->fail_msg = CL_DISP_MSGID_NONE;
108
109         p_madw->context.cc_context.node_guid = osm_node_get_node_guid(p_node);
110         p_madw->context.cc_context.port_guid = osm_physp_get_port_guid(p_physp);
111         p_madw->context.cc_context.port = port;
112         p_madw->context.cc_context.mad_method = IB_MAD_METHOD_SET;
113         p_madw->context.cc_context.attr_mod = attr_mod;
114
115         cl_spinlock_acquire(&p_cc->mad_queue_lock);
116         cl_atomic_inc(&p_cc->outstanding_mads);
117         cl_qlist_insert_tail(&p_cc->mad_queue, &p_madw->list_item);
118         cl_spinlock_release(&p_cc->mad_queue_lock);
119
120         cl_event_signal(&p_cc->cc_poller_wakeup);
121
122         OSM_LOG_EXIT(p_cc->log);
123 }
124
125 static void cc_setup_mad_data(osm_sm_t * p_sm)
126 {
127         osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc;
128         osm_subn_opt_t *p_opt = &p_sm->p_subn->opt;
129         uint16_t ccti_limit;
130         int i;
131
132         /* Switch Congestion Setting */
133         p_cc->sw_cong_setting.control_map = p_opt->cc_sw_cong_setting_control_map;
134
135         memcpy(p_cc->sw_cong_setting.victim_mask,
136                p_opt->cc_sw_cong_setting_victim_mask,
137                IB_CC_PORT_MASK_DATA_SIZE);
138
139         memcpy(p_cc->sw_cong_setting.credit_mask,
140                p_opt->cc_sw_cong_setting_credit_mask,
141                IB_CC_PORT_MASK_DATA_SIZE);
142
143         /* threshold is 4 bits, takes up upper nibble of byte */
144         p_cc->sw_cong_setting.threshold_resv = (p_opt->cc_sw_cong_setting_threshold << 4);
145
146         p_cc->sw_cong_setting.packet_size = p_opt->cc_sw_cong_setting_packet_size;
147
148         /* cs threshold is 4 bits, takes up upper nibble of short */
149         p_cc->sw_cong_setting.cs_threshold_resv =
150                 cl_hton16(p_opt->cc_sw_cong_setting_credit_starvation_threshold << 12);
151
152         p_cc->sw_cong_setting.cs_return_delay =
153                 cl_hton16(p_opt->cc_sw_cong_setting_credit_starvation_return_delay.shift << 14
154                           | p_opt->cc_sw_cong_setting_credit_starvation_return_delay.multiplier);
155
156         p_cc->sw_cong_setting.marking_rate = p_opt->cc_sw_cong_setting_marking_rate;
157
158         /* CA Congestion Setting */
159         p_cc->ca_cong_setting.port_control = p_opt->cc_ca_cong_setting_port_control;
160         p_cc->ca_cong_setting.control_map = p_opt->cc_ca_cong_setting_control_map;
161
162         for (i = 0; i < IB_CA_CONG_ENTRY_DATA_SIZE; i++) {
163                 ib_ca_cong_entry_t *p_entry;
164
165                 p_entry = &p_cc->ca_cong_setting.entry_list[i];
166
167                 p_entry->ccti_timer = p_opt->cc_ca_cong_entries[i].ccti_timer;
168                 p_entry->ccti_increase = p_opt->cc_ca_cong_entries[i].ccti_increase;
169                 p_entry->trigger_threshold = p_opt->cc_ca_cong_entries[i].trigger_threshold;
170                 p_entry->ccti_min = p_opt->cc_ca_cong_entries[i].ccti_min;
171                 p_entry->resv0 = 0;
172                 p_entry->resv1 = 0;
173         }
174
175         /* Congestion Control Table */
176
177         /* if no entries, we will always send at least 1 mad to set ccti_limit = 0 */
178         if (!p_opt->cc_cct.entries_len)
179                 p_cc->cc_tbl_mads = 1;
180         else {
181                 p_cc->cc_tbl_mads = p_opt->cc_cct.entries_len - 1;
182                 p_cc->cc_tbl_mads /= IB_CC_TBL_ENTRY_LIST_MAX;
183                 p_cc->cc_tbl_mads += 1;
184         }
185
186         CL_ASSERT(p_cc->cc_tbl_mads <= OSM_CCT_ENTRY_MAD_BLOCKS);
187
188         if (!p_opt->cc_cct.entries_len)
189                 ccti_limit = 0;
190         else
191                 ccti_limit = p_opt->cc_cct.entries_len - 1;
192
193         for (i = 0; i < p_cc->cc_tbl_mads; i++) {
194                 int j;
195
196                 p_cc->cc_tbl[i].ccti_limit = cl_hton16(ccti_limit);
197                 p_cc->cc_tbl[i].resv = 0;
198
199                 memset(p_cc->cc_tbl[i].entry_list,
200                        '\0',
201                        sizeof(p_cc->cc_tbl[i].entry_list));
202
203                 if (!ccti_limit)
204                         break;
205
206                 for (j = 0; j < IB_CC_TBL_ENTRY_LIST_MAX; j++) {
207                         int k;
208
209                         k = (i * IB_CC_TBL_ENTRY_LIST_MAX) + j;
210                         p_cc->cc_tbl[i].entry_list[j].shift_multiplier =
211                                 cl_hton16(p_opt->cc_cct.entries[k].shift << 14
212                                           | p_opt->cc_cct.entries[k].multiplier);
213                 }
214         }
215 }
216
217 static ib_api_status_t cc_send_sw_cong_setting(osm_sm_t * p_sm,
218                                                osm_node_t *p_node)
219 {
220         osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc;
221         unsigned force_update;
222         osm_physp_t *p_physp;
223         osm_madw_t *p_madw = NULL;
224         ib_cc_mad_t *p_cc_mad = NULL;
225         ib_sw_cong_setting_t *p_sw_cong_setting = NULL;
226
227         OSM_LOG_ENTER(p_sm->p_log);
228
229         p_physp = osm_node_get_physp_ptr(p_node, 0);
230
231         force_update = p_physp->need_update || p_sm->p_subn->need_update;
232
233         if (!force_update
234             && !memcmp(&p_cc->sw_cong_setting,
235                        &p_physp->cc.sw.sw_cong_setting,
236                        sizeof(p_cc->sw_cong_setting)))
237                 return IB_SUCCESS;
238
239         p_madw = osm_mad_pool_get(p_cc->mad_pool, p_cc->bind_handle,
240                                   MAD_BLOCK_SIZE, NULL);
241         if (p_madw == NULL) {
242                 OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR C101: "
243                         "failed to allocate mad\n");
244                 return IB_INSUFFICIENT_MEMORY;
245         }
246
247         p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw);
248
249         p_sw_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
250
251         memcpy(p_sw_cong_setting,
252                &p_cc->sw_cong_setting,
253                sizeof(p_cc->sw_cong_setting));
254
255         cc_mad_post(p_cc, p_madw, p_node, p_physp,
256                     IB_MAD_ATTR_SW_CONG_SETTING, 0);
257
258         OSM_LOG_EXIT(p_sm->p_log);
259
260         return IB_SUCCESS;
261 }
262
263 static ib_api_status_t cc_send_ca_cong_setting(osm_sm_t * p_sm,
264                                                osm_node_t *p_node,
265                                                osm_physp_t *p_physp)
266 {
267         osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc;
268         unsigned force_update;
269         osm_madw_t *p_madw = NULL;
270         ib_cc_mad_t *p_cc_mad = NULL;
271         ib_ca_cong_setting_t *p_ca_cong_setting = NULL;
272
273         OSM_LOG_ENTER(p_sm->p_log);
274
275         force_update = p_physp->need_update || p_sm->p_subn->need_update;
276
277         if (!force_update
278             && !memcmp(&p_cc->ca_cong_setting,
279                        &p_physp->cc.ca.ca_cong_setting,
280                        sizeof(p_cc->ca_cong_setting)))
281                 return IB_SUCCESS;
282
283         p_madw = osm_mad_pool_get(p_cc->mad_pool, p_cc->bind_handle,
284                                   MAD_BLOCK_SIZE, NULL);
285         if (p_madw == NULL) {
286                 OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR C102: "
287                         "failed to allocate mad\n");
288                 return IB_INSUFFICIENT_MEMORY;
289         }
290
291         p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw);
292
293         p_ca_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
294
295         memcpy(p_ca_cong_setting,
296                &p_cc->ca_cong_setting,
297                sizeof(p_cc->ca_cong_setting));
298
299         cc_mad_post(p_cc, p_madw, p_node, p_physp,
300                     IB_MAD_ATTR_CA_CONG_SETTING, 0);
301
302         OSM_LOG_EXIT(p_sm->p_log);
303
304         return IB_SUCCESS;
305 }
306
307 static ib_api_status_t cc_send_cct(osm_sm_t * p_sm,
308                                    osm_node_t *p_node,
309                                    osm_physp_t *p_physp)
310 {
311         osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc;
312         unsigned force_update;
313         osm_madw_t *p_madw = NULL;
314         ib_cc_mad_t *p_cc_mad = NULL;
315         ib_cc_tbl_t *p_cc_tbl = NULL;
316         unsigned int index = 0;
317
318         OSM_LOG_ENTER(p_sm->p_log);
319
320         force_update = p_physp->need_update || p_sm->p_subn->need_update;
321
322         for (index = 0; index < p_cc->cc_tbl_mads; index++) {
323                 if (!force_update
324                     && !memcmp(&p_cc->cc_tbl[index],
325                                &p_physp->cc.ca.cc_tbl[index],
326                                sizeof(p_cc->cc_tbl[index])))
327                         continue;
328
329                 p_madw = osm_mad_pool_get(p_cc->mad_pool, p_cc->bind_handle,
330                                           MAD_BLOCK_SIZE, NULL);
331                 if (p_madw == NULL) {
332                         OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR C103: "
333                                 "failed to allocate mad\n");
334                         return IB_INSUFFICIENT_MEMORY;
335                 }
336
337                 p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw);
338
339                 p_cc_tbl = (ib_cc_tbl_t *)ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
340
341                 memcpy(p_cc_tbl,
342                        &p_cc->cc_tbl[index],
343                        sizeof(p_cc->cc_tbl[index]));
344
345                 cc_mad_post(p_cc, p_madw, p_node, p_physp,
346                             IB_MAD_ATTR_CC_TBL, cl_hton32(index));
347         }
348
349         OSM_LOG_EXIT(p_sm->p_log);
350
351         return IB_SUCCESS;
352 }
353
354 int osm_congestion_control_setup(struct osm_opensm *p_osm)
355 {
356         cl_qmap_t *p_tbl;
357         cl_map_item_t *p_next;
358         int ret = 0;
359
360         if (!p_osm->subn.opt.congestion_control)
361                 return 0;
362
363         OSM_LOG_ENTER(&p_osm->log);
364
365         /*
366          * Do nothing unless the most recent routing attempt was successful.
367          */
368         if (!p_osm->routing_engine_used)
369                 return 0;
370
371         cc_setup_mad_data(&p_osm->sm);
372
373         cl_plock_acquire(&p_osm->lock);
374
375         p_tbl = &p_osm->subn.port_guid_tbl;
376         p_next = cl_qmap_head(p_tbl);
377         while (p_next != cl_qmap_end(p_tbl)) {
378                 osm_port_t *p_port = (osm_port_t *) p_next;
379                 osm_node_t *p_node = p_port->p_node;
380                 ib_api_status_t status;
381
382                 p_next = cl_qmap_next(p_next);
383
384                 if (p_port->cc_unavailable_flag)
385                         continue;
386
387                 if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) {
388                         status = cc_send_sw_cong_setting(&p_osm->sm, p_node);
389                         if (status != IB_SUCCESS)
390                                 ret = -1;
391                 } else if (osm_node_get_type(p_node) == IB_NODE_TYPE_CA) {
392                         status = cc_send_ca_cong_setting(&p_osm->sm,
393                                                          p_node,
394                                                          p_port->p_physp);
395                         if (status != IB_SUCCESS)
396                                 ret = -1;
397
398                         status = cc_send_cct(&p_osm->sm,
399                                              p_node,
400                                              p_port->p_physp);
401                         if (status != IB_SUCCESS)
402                                 ret = -1;
403                 }
404         }
405
406         cl_plock_release(&p_osm->lock);
407
408         OSM_LOG_EXIT(&p_osm->log);
409
410         return ret;
411 }
412
413 int osm_congestion_control_wait_pending_transactions(struct osm_opensm *p_osm)
414 {
415         osm_congestion_control_t *cc = &p_osm->cc;
416
417         if (!p_osm->subn.opt.congestion_control)
418                 return 0;
419
420         while (1) {
421                 unsigned count = cc->outstanding_mads;
422                 if (!count || osm_exit_flag)
423                         break;
424                 cl_event_wait_on(&cc->outstanding_mads_done_event,
425                                  EVENT_NO_TIMEOUT,
426                                  TRUE);
427         }
428
429         return osm_exit_flag;
430 }
431
432 static inline void decrement_outstanding_mads(osm_congestion_control_t *p_cc)
433 {
434         uint32_t outstanding;
435
436         outstanding = cl_atomic_dec(&p_cc->outstanding_mads);
437         if (!outstanding)
438                 cl_event_signal(&p_cc->outstanding_mads_done_event);
439
440         cl_atomic_dec(&p_cc->outstanding_mads_on_wire);
441         cl_event_signal(&p_cc->sig_mads_on_wire_continue);
442 }
443
444 static void cc_rcv_mad(void *context, void *data)
445 {
446         osm_congestion_control_t *p_cc = context;
447         osm_opensm_t *p_osm = p_cc->osm;
448         osm_madw_t *p_madw = data;
449         ib_cc_mad_t *p_cc_mad;
450         osm_madw_context_t *p_mad_context = &p_madw->context;
451         ib_mad_t *p_mad = osm_madw_get_mad_ptr(p_madw);
452         ib_net64_t node_guid = p_mad_context->cc_context.node_guid;
453         ib_net64_t port_guid = p_mad_context->cc_context.port_guid;
454         uint8_t port = p_mad_context->cc_context.port;
455         osm_port_t *p_port;
456
457         OSM_LOG_ENTER(p_cc->log);
458
459         OSM_LOG(p_cc->log, OSM_LOG_VERBOSE,
460                 "Processing received MAD status 0x%x for "
461                 "attr ID %u mod 0x%x node 0x%" PRIx64 " port %u\n",
462                 cl_ntoh16(p_mad->status), cl_ntoh16(p_mad->attr_id),
463                 cl_ntoh32(p_mad_context->cc_context.attr_mod),
464                 cl_ntoh64(node_guid), port);
465
466         p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw);
467
468         cl_plock_acquire(&p_osm->lock);
469
470         p_port = osm_get_port_by_guid(p_cc->subn, port_guid);
471         if (!p_port) {
472                 OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C109: "
473                         "Port GUID 0x%" PRIx64 " not in table\n",
474                         cl_ntoh64(port_guid));
475                 cl_plock_release(&p_osm->lock);
476                 goto Exit;
477         }
478
479         p_port->cc_timeout_count = 0;
480
481         if (p_cc_mad->header.status) {
482                 if (p_cc_mad->header.status & IB_MAD_STATUS_UNSUP_CLASS_VER
483                     || p_cc_mad->header.status & IB_MAD_STATUS_UNSUP_METHOD
484                     || p_cc_mad->header.status & IB_MAD_STATUS_UNSUP_METHOD_ATTR)
485                         p_port->cc_unavailable_flag = TRUE;
486                 cl_plock_release(&p_osm->lock);
487                 goto Exit;
488         }
489         else
490                 p_port->cc_unavailable_flag = FALSE;
491
492         if (p_cc_mad->header.attr_id == IB_MAD_ATTR_SW_CONG_SETTING) {
493                 ib_sw_cong_setting_t *p_sw_cong_setting;
494
495                 p_sw_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
496                 p_port->p_physp->cc.sw.sw_cong_setting = *p_sw_cong_setting;
497         }
498         else if (p_cc_mad->header.attr_id == IB_MAD_ATTR_CA_CONG_SETTING) {
499                 ib_ca_cong_setting_t *p_ca_cong_setting;
500
501                 p_ca_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
502                 p_port->p_physp->cc.ca.ca_cong_setting = *p_ca_cong_setting;
503         }
504         else if (p_cc_mad->header.attr_id == IB_MAD_ATTR_CC_TBL) {
505                 ib_net32_t attr_mod = p_mad_context->cc_context.attr_mod;
506                 uint32_t index = cl_ntoh32(attr_mod);
507                 ib_cc_tbl_t *p_cc_tbl;
508
509                 p_cc_tbl = ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
510                 p_port->p_physp->cc.ca.cc_tbl[index] = *p_cc_tbl;
511         }
512         else
513                 OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C10A: "
514                         "Unexpected MAD attribute ID %u received\n",
515                         cl_ntoh16(p_cc_mad->header.attr_id));
516
517         cl_plock_release(&p_osm->lock);
518
519 Exit:
520         decrement_outstanding_mads(p_cc);
521         osm_mad_pool_put(p_cc->mad_pool, p_madw);
522         OSM_LOG_EXIT(p_cc->log);
523 }
524
525 static void cc_poller_send(osm_congestion_control_t *p_cc,
526                            osm_madw_t *p_madw)
527 {
528         osm_subn_opt_t *p_opt = &p_cc->subn->opt;
529         ib_api_status_t status;
530         cl_status_t sts;
531         osm_madw_context_t mad_context = p_madw->context;
532
533         status = osm_vendor_send(p_cc->bind_handle, p_madw, TRUE);
534         if (status == IB_SUCCESS) {
535                 cl_atomic_inc(&p_cc->outstanding_mads_on_wire);
536                 while (p_cc->outstanding_mads_on_wire >
537                        (int32_t)p_opt->cc_max_outstanding_mads) {
538 wait:
539                         sts = cl_event_wait_on(&p_cc->sig_mads_on_wire_continue,
540                                                EVENT_NO_TIMEOUT, TRUE);
541                         if (sts != CL_SUCCESS)
542                                 goto wait;
543                 }
544         } else
545                 OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C104: "
546                         "send failed to node 0x%" PRIx64 "port %u\n",
547                         cl_ntoh64(mad_context.cc_context.node_guid),
548                         mad_context.cc_context.port);
549 }
550
551 static void cc_poller(void *p_ptr)
552 {
553         osm_congestion_control_t *p_cc = p_ptr;
554         osm_madw_t *p_madw;
555
556         OSM_LOG_ENTER(p_cc->log);
557
558         if (p_cc->thread_state == OSM_THREAD_STATE_NONE)
559                 p_cc->thread_state = OSM_THREAD_STATE_RUN;
560
561         while (p_cc->thread_state == OSM_THREAD_STATE_RUN) {
562                 cl_spinlock_acquire(&p_cc->mad_queue_lock);
563
564                 p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_cc->mad_queue);
565
566                 cl_spinlock_release(&p_cc->mad_queue_lock);
567
568                 if (p_madw != (osm_madw_t *) cl_qlist_end(&p_cc->mad_queue))
569                         cc_poller_send(p_cc, p_madw);
570                 else
571                         cl_event_wait_on(&p_cc->cc_poller_wakeup,
572                                          EVENT_NO_TIMEOUT, TRUE);
573         }
574
575         OSM_LOG_EXIT(p_cc->log);
576 }
577
578 ib_api_status_t osm_congestion_control_init(osm_congestion_control_t * p_cc,
579                                             struct osm_opensm *p_osm,
580                                             const osm_subn_opt_t * p_opt)
581 {
582         ib_api_status_t status = IB_SUCCESS;
583
584         OSM_LOG_ENTER(&p_osm->log);
585
586         memset(p_cc, 0, sizeof(*p_cc));
587
588         p_cc->osm = p_osm;
589         p_cc->subn = &p_osm->subn;
590         p_cc->sm = &p_osm->sm;
591         p_cc->log = &p_osm->log;
592         p_cc->mad_pool = &p_osm->mad_pool;
593         p_cc->trans_id = CONGESTION_CONTROL_INITIAL_TID_VALUE;
594         p_cc->vendor = p_osm->p_vendor;
595
596         p_cc->cc_disp_h = cl_disp_register(&p_osm->disp, OSM_MSG_MAD_CC,
597                                            cc_rcv_mad, p_cc);
598         if (p_cc->cc_disp_h == CL_DISP_INVALID_HANDLE)
599                 goto Exit;
600
601         cl_qlist_init(&p_cc->mad_queue);
602
603         status = cl_spinlock_init(&p_cc->mad_queue_lock);
604         if (status != IB_SUCCESS)
605                 goto Exit;
606
607         cl_event_construct(&p_cc->cc_poller_wakeup);
608         status = cl_event_init(&p_cc->cc_poller_wakeup, FALSE);
609         if (status != IB_SUCCESS)
610                 goto Exit;
611
612         cl_event_construct(&p_cc->outstanding_mads_done_event);
613         status = cl_event_init(&p_cc->outstanding_mads_done_event, FALSE);
614         if (status != IB_SUCCESS)
615                 goto Exit;
616
617         cl_event_construct(&p_cc->sig_mads_on_wire_continue);
618         status = cl_event_init(&p_cc->sig_mads_on_wire_continue, FALSE);
619         if (status != IB_SUCCESS)
620                 goto Exit;
621
622         p_cc->thread_state = OSM_THREAD_STATE_NONE;
623
624         status = cl_thread_init(&p_cc->cc_poller, cc_poller, p_cc,
625                                 "cc poller");
626         if (status != IB_SUCCESS)
627                 goto Exit;
628
629         status = IB_SUCCESS;
630 Exit:
631         OSM_LOG_EXIT(p_cc->log);
632         return status;
633 }
634
635 static void cc_mad_recv_callback(osm_madw_t * p_madw, void *bind_context,
636                                  osm_madw_t * p_req_madw)
637 {
638         osm_congestion_control_t *p_cc = bind_context;
639
640         OSM_LOG_ENTER(p_cc->log);
641
642         CL_ASSERT(p_madw);
643
644         /* HACK - should be extended when supporting CC traps */
645         CL_ASSERT(p_req_madw != NULL);
646
647         osm_madw_copy_context(p_madw, p_req_madw);
648         osm_mad_pool_put(p_cc->mad_pool, p_req_madw);
649
650         /* Do not decrement outstanding mads here, do it in the dispatcher */
651
652         if (cl_disp_post(p_cc->cc_disp_h, OSM_MSG_MAD_CC,
653                          p_madw, NULL, NULL) != CL_SUCCESS) {
654                 OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C105: "
655                         "Congestion Control Dispatcher post failed\n");
656                 osm_mad_pool_put(p_cc->mad_pool, p_madw);
657         }
658
659         OSM_LOG_EXIT(p_cc->log);
660 }
661
662 static void cc_mad_send_err_callback(void *bind_context,
663                                      osm_madw_t * p_madw)
664 {
665         osm_congestion_control_t *p_cc = bind_context;
666         osm_madw_context_t *p_madw_context = &p_madw->context;
667         osm_opensm_t *p_osm = p_cc->osm;
668         uint64_t node_guid = p_madw_context->cc_context.node_guid;
669         uint64_t port_guid = p_madw_context->cc_context.port_guid;
670         uint8_t port = p_madw_context->cc_context.port;
671         osm_port_t *p_port;
672         int log_flag = 1;
673
674         OSM_LOG_ENTER(p_cc->log);
675
676         cl_plock_acquire(&p_osm->lock);
677
678         p_port = osm_get_port_by_guid(p_cc->subn, port_guid);
679         if (!p_port) {
680                 OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C10B: "
681                         "Port GUID 0x%" PRIx64 " not in table\n",
682                         cl_ntoh64(port_guid));
683                 cl_plock_release(&p_osm->lock);
684                 goto Exit;
685         }
686
687         /* If timed out before, don't bothering logging again
688          * we assume no CC support
689          */
690         if (p_madw->status == IB_TIMEOUT
691             && p_port->cc_timeout_count)
692                 log_flag = 0;
693
694         if (log_flag)
695                 OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C106: MAD Error (%s): "
696                         "attr id = %u LID %u GUID 0x%016" PRIx64 " port %u "
697                         "TID 0x%" PRIx64 "\n",
698                         ib_get_err_str(p_madw->status),
699                         p_madw->p_mad->attr_id,
700                         cl_ntoh16(p_madw->mad_addr.dest_lid),
701                         cl_ntoh64(node_guid),
702                         port,
703                         cl_ntoh64(p_madw->p_mad->trans_id));
704
705         if (p_madw->status == IB_TIMEOUT) {
706                 p_port->cc_timeout_count++;
707                 if (p_port->cc_timeout_count > OSM_CC_TIMEOUT_COUNT_THRESHOLD
708                     && !p_port->cc_unavailable_flag) {
709                         p_port->cc_unavailable_flag = TRUE;
710                         p_port->cc_timeout_count = 0;
711                 }
712         } else
713                 p_cc->subn->subnet_initialization_error = TRUE;
714
715         cl_plock_release(&p_osm->lock);
716
717 Exit:
718         osm_mad_pool_put(p_cc->mad_pool, p_madw);
719
720         decrement_outstanding_mads(p_cc);
721
722         OSM_LOG_EXIT(p_cc->log);
723 }
724
725 ib_api_status_t osm_congestion_control_bind(osm_congestion_control_t * p_cc,
726                                             ib_net64_t port_guid)
727 {
728         osm_bind_info_t bind_info;
729         ib_api_status_t status = IB_SUCCESS;
730
731         OSM_LOG_ENTER(p_cc->log);
732
733         bind_info.port_guid = p_cc->port_guid = port_guid;
734         bind_info.mad_class = IB_MCLASS_CC;
735         bind_info.class_version = 2;
736         bind_info.is_responder = FALSE;
737         bind_info.is_report_processor = FALSE;
738         bind_info.is_trap_processor = FALSE;
739         bind_info.recv_q_size = OSM_SM_DEFAULT_QP1_RCV_SIZE;
740         bind_info.send_q_size = OSM_SM_DEFAULT_QP1_SEND_SIZE;
741         bind_info.timeout = p_cc->subn->opt.transaction_timeout;
742         bind_info.retries = p_cc->subn->opt.transaction_retries;
743
744         OSM_LOG(p_cc->log, OSM_LOG_VERBOSE,
745                 "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid));
746
747         p_cc->bind_handle = osm_vendor_bind(p_cc->vendor, &bind_info,
748                                             p_cc->mad_pool,
749                                             cc_mad_recv_callback,
750                                             cc_mad_send_err_callback, p_cc);
751
752         if (p_cc->bind_handle == OSM_BIND_INVALID_HANDLE) {
753                 status = IB_ERROR;
754                 OSM_LOG(p_cc->log, OSM_LOG_ERROR,
755                         "ERR C107: Vendor specific bind failed (%s)\n",
756                         ib_get_err_str(status));
757                 goto Exit;
758         }
759
760 Exit:
761         OSM_LOG_EXIT(p_cc->log);
762         return status;
763 }
764
765 void osm_congestion_control_shutdown(osm_congestion_control_t * p_cc)
766 {
767         OSM_LOG_ENTER(p_cc->log);
768         if (p_cc->bind_handle == OSM_BIND_INVALID_HANDLE) {
769                 OSM_LOG(p_cc->log, OSM_LOG_ERROR,
770                         "ERR C108: No previous bind\n");
771                 goto Exit;
772         }
773         cl_disp_unregister(p_cc->cc_disp_h);
774 Exit:
775         OSM_LOG_EXIT(p_cc->log);
776 }
777
778 void osm_congestion_control_destroy(osm_congestion_control_t * p_cc)
779 {
780         osm_madw_t *p_madw;
781
782         OSM_LOG_ENTER(p_cc->log);
783
784         p_cc->thread_state = OSM_THREAD_STATE_EXIT;
785
786         cl_event_signal(&p_cc->sig_mads_on_wire_continue);
787         cl_event_signal(&p_cc->cc_poller_wakeup);
788
789         cl_thread_destroy(&p_cc->cc_poller);
790
791         cl_spinlock_acquire(&p_cc->mad_queue_lock);
792
793         while (!cl_is_qlist_empty(&p_cc->mad_queue)) {
794                 p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_cc->mad_queue);
795                 osm_mad_pool_put(p_cc->mad_pool, p_madw);
796         }
797
798         cl_spinlock_release(&p_cc->mad_queue_lock);
799
800         cl_spinlock_destroy(&p_cc->mad_queue_lock);
801
802         cl_event_destroy(&p_cc->cc_poller_wakeup);
803         cl_event_destroy(&p_cc->outstanding_mads_done_event);
804         cl_event_destroy(&p_cc->sig_mads_on_wire_continue);
805
806         OSM_LOG_EXIT(p_cc->log);
807 }