2 * Copyright (c) 2006-2009 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2010-2015 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 2009 HNR Consulting. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38 * Implementation of OpenSM QoS infrastructure primitives
43 #endif /* HAVE_CONFIG_H */
48 #include <iba/ib_types.h>
49 #include <complib/cl_qmap.h>
50 #include <complib/cl_debug.h>
51 #include <opensm/osm_file_ids.h>
52 #define FILE_ID OSM_FILE_QOS_C
53 #include <opensm/osm_opensm.h>
54 #include <opensm/osm_subnet.h>
55 #include <opensm/osm_qos_policy.h>
59 uint8_t vl_high_limit;
60 ib_vl_arb_table_t vlarb_high[2];
61 ib_vl_arb_table_t vlarb_low[2];
62 ib_slvl_table_t sl2vl;
65 typedef struct qos_mad_item {
66 cl_list_item_t list_item;
70 typedef struct qos_mad_list {
71 cl_list_item_t list_item;
72 cl_qlist_t port_mad_list;
75 static void qos_build_config(struct qos_config *cfg,
76 osm_qos_options_t * opt,
77 osm_qos_options_t * dflt);
83 static qos_mad_item_t *osm_qos_mad_create(IN osm_sm_t * sm,
85 IN uint32_t data_size,
87 IN ib_net16_t attr_id,
91 qos_mad_item_t *p_mad;
92 osm_madw_context_t context;
98 p_node = osm_physp_get_node_ptr(p);
99 if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH &&
100 osm_physp_get_port_num(p) != 0) {
101 physp0 = osm_node_get_physp_ptr(p_node, 0);
102 m_key = ib_port_info_get_m_key(&physp0->port_info);
104 m_key = ib_port_info_get_m_key(&p->port_info);
107 case IB_MAD_ATTR_SLVL_TABLE:
108 context.slvl_context.node_guid = osm_node_get_node_guid(p_node);
109 context.slvl_context.port_guid = osm_physp_get_port_guid(p);
110 context.slvl_context.set_method = TRUE;
112 case IB_MAD_ATTR_VL_ARBITRATION:
113 context.vla_context.node_guid = osm_node_get_node_guid(p_node);
114 context.vla_context.port_guid = osm_physp_get_port_guid(p);
115 context.vla_context.set_method = TRUE;
121 p_mad = (qos_mad_item_t *) malloc(sizeof(*p_mad));
125 memset(p_mad, 0, sizeof(*p_mad));
127 p_madw = osm_prepare_req_set(sm, osm_physp_get_dr_path_ptr(p),
129 attr_id, cl_hton32(attr_mod),
131 CL_DISP_MSGID_NONE, &context);
133 if (p_madw == NULL) {
137 p_mad->p_madw = p_madw;
141 static void osm_qos_mad_delete(qos_mad_item_t ** p_item)
147 static ib_api_status_t vlarb_update_table_block(osm_sm_t * sm,
150 unsigned force_update,
151 const ib_vl_arb_table_t *
153 unsigned block_length,
155 cl_qlist_t *mad_list)
157 struct osm_routing_engine *re = sm->p_subn->p_osm->routing_engine_used;
158 ib_vl_arb_table_t block;
161 qos_mad_item_t *p_mad;
162 vl_mask = (1 << (ib_port_info_get_op_vls(&p->port_info) - 1)) - 1;
164 memset(&block, 0, sizeof(block));
165 memcpy(&block, table_block, block_length * sizeof(block.vl_entry[0]));
167 if (re && re->update_vlarb)
168 re->update_vlarb(re->context, p, port_num, &block,
169 block_length, block_num);
171 for (i = 0; i < block_length; i++)
172 block.vl_entry[i].vl &= vl_mask;
175 !memcmp(&p->vl_arb[block_num], &block,
176 block_length * sizeof(block.vl_entry[0])))
179 attr_mod = ((block_num + 1) << 16) | port_num;
181 p_mad = osm_qos_mad_create(sm,p,sizeof(block),(uint8_t *) & block,
182 IB_MAD_ATTR_VL_ARBITRATION, attr_mod);
185 return IB_INSUFFICIENT_MEMORY;
188 * Zero the stored VL Arbitration block, so in case the MAD will
189 * end up with error, we will resend it in the next sweep.
191 memset(&p->vl_arb[block_num], 0,
192 block_length * sizeof(block.vl_entry[0]));
194 cl_qlist_insert_tail(mad_list, &p_mad->list_item);
199 static ib_api_status_t vlarb_update(osm_sm_t * sm, osm_physp_t * p,
200 uint8_t port_num, unsigned force_update,
201 const struct qos_config *qcfg,
202 cl_qlist_t *mad_list)
204 ib_api_status_t status = IB_SUCCESS;
205 ib_port_info_t *p_pi = &p->port_info;
208 if (p_pi->vl_arb_low_cap > 0) {
209 len = p_pi->vl_arb_low_cap < IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK ?
210 p_pi->vl_arb_low_cap : IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK;
211 if ((status = vlarb_update_table_block(sm, p, port_num,
215 mad_list)) != IB_SUCCESS)
218 if (p_pi->vl_arb_low_cap > IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK) {
219 len = p_pi->vl_arb_low_cap % IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK;
220 if ((status = vlarb_update_table_block(sm, p, port_num,
224 mad_list)) != IB_SUCCESS)
227 if (p_pi->vl_arb_high_cap > 0) {
228 len = p_pi->vl_arb_high_cap < IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK ?
229 p_pi->vl_arb_high_cap : IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK;
230 if ((status = vlarb_update_table_block(sm, p, port_num,
232 &qcfg->vlarb_high[0],
234 mad_list)) != IB_SUCCESS)
237 if (p_pi->vl_arb_high_cap > IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK) {
238 len = p_pi->vl_arb_high_cap % IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK;
239 if ((status = vlarb_update_table_block(sm, p, port_num,
241 &qcfg->vlarb_high[1],
243 mad_list)) != IB_SUCCESS)
250 static ib_api_status_t sl2vl_update_table(osm_sm_t * sm, osm_physp_t * p,
251 uint8_t in_port, uint32_t attr_mod,
252 unsigned force_update,
253 const ib_slvl_table_t * sl2vl_table,
254 cl_qlist_t *mad_list)
256 ib_slvl_table_t tbl, *p_tbl;
260 qos_mad_item_t *p_mad;
262 vl_mask = (1 << (ib_port_info_get_op_vls(&p->port_info) - 1)) - 1;
264 for (i = 0; i < IB_MAX_NUM_VLS / 2; i++) {
265 vl1 = sl2vl_table->raw_vl_by_sl[i] >> 4;
266 vl2 = sl2vl_table->raw_vl_by_sl[i] & 0xf;
271 tbl.raw_vl_by_sl[i] = (vl1 << 4) | vl2;
274 p_tbl = osm_physp_get_slvl_tbl(p, in_port);
276 if (!force_update && !memcmp(p_tbl, &tbl, sizeof(tbl)))
279 p_mad = osm_qos_mad_create(sm, p, sizeof(tbl), (uint8_t *) & tbl,
280 IB_MAD_ATTR_SLVL_TABLE, attr_mod);
282 return IB_INSUFFICIENT_MEMORY;
285 * Zero the stored SL2VL block, so in case the MAD will
286 * end up with error, we will resend it in the next sweep.
288 memset(p_tbl, 0, sizeof(tbl));
290 cl_qlist_insert_tail(mad_list, &p_mad->list_item);
294 static int qos_extports_setup(osm_sm_t * sm, osm_node_t *node,
295 const struct qos_config *qcfg,
296 cl_qlist_t *port_mad_list)
300 unsigned force_update;
301 unsigned num_ports = osm_node_get_num_physp(node);
302 struct osm_routing_engine *re = sm->p_subn->p_osm->routing_engine_used;
305 uint8_t op_vl, common_op_vl = 0, max_num = 0;
306 uint8_t op_vl_arr[15];
309 * Do nothing unless the most recent routing attempt was successful.
314 for (out = 1; out < num_ports; out++) {
315 p = osm_node_get_physp_ptr(node, out);
318 if (ib_port_info_get_port_state(&p->port_info) == IB_LINK_DOWN)
320 force_update = p->need_update || sm->p_subn->need_update;
321 p->vl_high_limit = qcfg->vl_high_limit;
322 if (vlarb_update(sm, p, p->port_num, force_update, qcfg,
327 p0 = osm_node_get_physp_ptr(node, 0);
328 if (!(p0->port_info.capability_mask & IB_PORT_CAP_HAS_SL_MAP))
331 if (ib_switch_info_get_opt_sl2vlmapping(&node->sw->switch_info) &&
332 sm->p_subn->opt.use_optimized_slvl && !re->update_sl2vl) {
334 /* we should find the op_vl that is used by majority of ports */
335 memset(&op_vl_arr[0], 0, sizeof(op_vl_arr));
336 p0 = osm_node_get_physp_ptr(node, 1);
338 for (out = 1; out < num_ports; out++) {
339 p = osm_node_get_physp_ptr(node, out);
342 if (ib_port_info_get_port_state(&p->port_info) ==
345 op_vl = ib_port_info_get_op_vls(&p->port_info);
347 if (op_vl_arr[op_vl] > max_num){
348 max_num = op_vl_arr[op_vl];
349 common_op_vl = op_vl;
350 /* remember the port with most common op_vl */
357 force_update = node->sw->need_update || sm->p_subn->need_update;
358 if (sl2vl_update_table(sm, p0, p0->port_num, 0x30000, force_update,
359 &qcfg->sl2vl, port_mad_list))
362 * Overwrite default ALL configuration if port's
363 * op_vl is different.
365 for (out = 1; out < num_ports; out++) {
366 p = osm_node_get_physp_ptr(node, out);
369 if (ib_port_info_get_port_state(&p->port_info) ==
373 force_update = p->need_update || force_update;
374 if (ib_port_info_get_op_vls(&p->port_info) !=
376 sl2vl_update_table(sm, p, p->port_num, 0x20000 | out,
377 force_update, &qcfg->sl2vl,
384 /* non optimized sl2vl configuration */
385 out = ib_switch_info_is_enhanced_port0(&node->sw->switch_info) ? 0 : 1;
386 for (; out < num_ports; out++) {
387 p = osm_node_get_physp_ptr(node, out);
390 if (ib_port_info_get_port_state(&p->port_info) == IB_LINK_DOWN)
392 force_update = p->need_update || sm->p_subn->need_update;
393 /* go over all in ports */
394 for (in = 0; in < num_ports; in++) {
395 const ib_slvl_table_t *port_sl2vl = &qcfg->sl2vl;
396 ib_slvl_table_t routing_sl2vl;
398 if (re->update_sl2vl) {
399 routing_sl2vl = *port_sl2vl;
400 re->update_sl2vl(re->context,
401 p, in, out, &routing_sl2vl);
402 port_sl2vl = &routing_sl2vl;
404 if (sl2vl_update_table(sm, p, in, in << 8 | out,
405 force_update, port_sl2vl,
414 static int qos_endport_setup(osm_sm_t * sm, osm_physp_t * p,
415 const struct qos_config *qcfg, int vlarb_only,
416 cl_qlist_t *port_mad_list)
418 unsigned force_update = p->need_update || sm->p_subn->need_update;
419 struct osm_routing_engine *re = sm->p_subn->p_osm->routing_engine_used;
420 const ib_slvl_table_t *port_sl2vl = &qcfg->sl2vl;
421 ib_slvl_table_t routing_sl2vl;
423 p->vl_high_limit = qcfg->vl_high_limit;
424 if (vlarb_update(sm, p, 0, force_update, qcfg, port_mad_list))
429 if (!(p->port_info.capability_mask & IB_PORT_CAP_HAS_SL_MAP))
432 if (re && re->update_sl2vl) {
433 routing_sl2vl = *port_sl2vl;
434 re->update_sl2vl(re->context, p, 0, 0, &routing_sl2vl);
435 port_sl2vl = &routing_sl2vl;
437 if (sl2vl_update_table(sm, p, 0, 0, force_update, port_sl2vl,
444 int osm_qos_setup(osm_opensm_t * p_osm)
446 struct qos_config ca_config, sw0_config, swe_config, rtr_config;
447 struct qos_config *cfg;
449 cl_map_item_t *p_next;
454 qos_mad_list_t *p_list, *p_list_next;
455 qos_mad_item_t *p_port_mad;
456 cl_qlist_t qos_mad_list;
458 if (!p_osm->subn.opt.qos)
461 OSM_LOG_ENTER(&p_osm->log);
463 qos_build_config(&ca_config, &p_osm->subn.opt.qos_ca_options,
464 &p_osm->subn.opt.qos_options);
465 qos_build_config(&sw0_config, &p_osm->subn.opt.qos_sw0_options,
466 &p_osm->subn.opt.qos_options);
467 qos_build_config(&swe_config, &p_osm->subn.opt.qos_swe_options,
468 &p_osm->subn.opt.qos_options);
469 qos_build_config(&rtr_config, &p_osm->subn.opt.qos_rtr_options,
470 &p_osm->subn.opt.qos_options);
472 cl_qlist_init(&qos_mad_list);
474 cl_plock_excl_acquire(&p_osm->lock);
476 /* read QoS policy config file */
477 osm_qos_parse_policy_file(&p_osm->subn);
478 p_tbl = &p_osm->subn.port_guid_tbl;
479 p_next = cl_qmap_head(p_tbl);
480 while (p_next != cl_qmap_end(p_tbl)) {
482 p_port = (osm_port_t *) p_next;
483 p_next = cl_qmap_next(p_next);
485 p_list = (qos_mad_list_t *) malloc(sizeof(*p_list));
487 cl_plock_release(&p_osm->lock);
491 memset(p_list, 0, sizeof(*p_list));
493 cl_qlist_init(&p_list->port_mad_list);
495 p_node = p_port->p_node;
497 if (qos_extports_setup(&p_osm->sm, p_node, &swe_config,
498 &p_list->port_mad_list)) {
499 cl_plock_release(&p_osm->lock);
503 /* skip base port 0 */
504 if (!ib_switch_info_is_enhanced_port0
505 (&p_node->sw->switch_info))
508 if (ib_switch_info_get_opt_sl2vlmapping(&p_node->sw->switch_info) &&
509 p_osm->sm.p_subn->opt.use_optimized_slvl &&
510 !memcmp(&swe_config.sl2vl, &sw0_config.sl2vl,
511 sizeof(swe_config.sl2vl)))
515 } else if (osm_node_get_type(p_node) == IB_NODE_TYPE_ROUTER)
520 if (qos_endport_setup(&p_osm->sm, p_port->p_physp, cfg,
521 vlarb_only, &p_list->port_mad_list)) {
522 cl_plock_release(&p_osm->lock);
526 /* if MAD list is not empty, add it to the global MAD list */
527 if (cl_qlist_count(&p_list->port_mad_list)) {
528 cl_qlist_insert_tail(&qos_mad_list, &p_list->list_item);
533 while (cl_qlist_count(&qos_mad_list)) {
534 p_list_next = (qos_mad_list_t *) cl_qlist_head(&qos_mad_list);
535 while (p_list_next !=
536 (qos_mad_list_t *) cl_qlist_end(&qos_mad_list)) {
537 p_list = p_list_next;
538 p_list_next = (qos_mad_list_t *)
539 cl_qlist_next(&p_list->list_item);
540 /* next MAD to send*/
541 p_port_mad = (qos_mad_item_t *)
542 cl_qlist_remove_head(&p_list->port_mad_list);
543 osm_send_req_mad(&p_osm->sm, p_port_mad->p_madw);
544 osm_qos_mad_delete(&p_port_mad);
545 /* remove the QoS MAD from global MAD list */
546 if (cl_qlist_count(&p_list->port_mad_list) == 0) {
547 cl_qlist_remove_item(&qos_mad_list, &p_list->list_item);
553 cl_plock_release(&p_osm->lock);
554 OSM_LOG_EXIT(&p_osm->log);
562 static int parse_one_unsigned(const char *str, char delim, unsigned *val)
565 *val = strtoul(str, &end, 0);
568 return (int)(end - str);
571 static int parse_vlarb_entry(const char *str, ib_vl_arb_element_t * e)
575 p += parse_one_unsigned(p, ':', &val);
577 p += parse_one_unsigned(p, ',', &val);
578 e->weight = (uint8_t) val;
579 return (int)(p - str);
582 static int parse_sl2vl_entry(const char *str, uint8_t * raw)
586 p += parse_one_unsigned(p, ',', &val1);
587 p += parse_one_unsigned(p, ',', &val2);
588 *raw = (val1 << 4) | (val2 & 0xf);
589 return (int)(p - str);
592 static void qos_build_config(struct qos_config *cfg, osm_qos_options_t * opt,
593 osm_qos_options_t * dflt)
598 memset(cfg, 0, sizeof(*cfg));
600 if (opt->max_vls > 0)
601 cfg->max_vls = opt->max_vls;
603 if (dflt->max_vls > 0)
604 cfg->max_vls = dflt->max_vls;
606 cfg->max_vls = OSM_DEFAULT_QOS_MAX_VLS;
609 if (opt->high_limit >= 0)
610 cfg->vl_high_limit = (uint8_t) opt->high_limit;
612 if (dflt->high_limit >= 0)
613 cfg->vl_high_limit = (uint8_t) dflt->high_limit;
615 cfg->vl_high_limit = (uint8_t) OSM_DEFAULT_QOS_HIGH_LIMIT;
621 if (dflt->vlarb_high)
622 p = dflt->vlarb_high;
624 p = OSM_DEFAULT_QOS_VLARB_HIGH;
626 for (i = 0; i < 2 * IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK; i++) {
627 p += parse_vlarb_entry(p,
629 IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK].
631 IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK]);
640 p = OSM_DEFAULT_QOS_VLARB_LOW;
642 for (i = 0; i < 2 * IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK; i++) {
643 p += parse_vlarb_entry(p,
645 IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK].
647 IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK]);
650 p = opt->sl2vl ? opt->sl2vl : dflt->sl2vl;
657 p = OSM_DEFAULT_QOS_SL2VL;
659 for (i = 0; i < IB_MAX_NUM_VLS / 2; i++)
660 p += parse_sl2vl_entry(p, &cfg->sl2vl.raw_vl_by_sl[i]);