2 * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 #include <dev/mlx5/cmd.h>
32 static const char *mlx5_ib_cong_params_desc[] = {
33 MLX5_IB_CONG_PARAMS(MLX5_IB_STATS_DESC)
36 static const char *mlx5_ib_cong_stats_desc[] = {
37 MLX5_IB_CONG_STATS(MLX5_IB_STATS_DESC)
40 #define MLX5_IB_INDEX(field) (__offsetof(struct mlx5_ib_congestion, field) / sizeof(u64))
41 #define MLX5_IB_FLD_MAX(type, field) ((1ULL << __mlx5_bit_sz(type, field)) - 1ULL)
42 #define MLX5_IB_SET_CLIPPED(type, ptr, field, var) do { \
44 if ((var) > MLX5_IB_FLD_MAX(type, field)) \
45 (var) = MLX5_IB_FLD_MAX(type, field); \
47 MLX5_SET(type, ptr, field, var); \
50 #define CONG_LOCK(dev) sx_xlock(&(dev)->congestion.lock)
51 #define CONG_UNLOCK(dev) sx_xunlock(&(dev)->congestion.lock)
52 #define CONG_LOCKED(dev) sx_xlocked(&(dev)->congestion.lock)
54 #define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR BIT(1)
55 #define MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR BIT(2)
56 #define MLX5_IB_RP_TIME_RESET_ATTR BIT(3)
57 #define MLX5_IB_RP_BYTE_RESET_ATTR BIT(4)
58 #define MLX5_IB_RP_THRESHOLD_ATTR BIT(5)
59 #define MLX5_IB_RP_AI_RATE_ATTR BIT(7)
60 #define MLX5_IB_RP_HAI_RATE_ATTR BIT(8)
61 #define MLX5_IB_RP_MIN_DEC_FAC_ATTR BIT(9)
62 #define MLX5_IB_RP_MIN_RATE_ATTR BIT(10)
63 #define MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR BIT(11)
64 #define MLX5_IB_RP_DCE_TCP_G_ATTR BIT(12)
65 #define MLX5_IB_RP_DCE_TCP_RTT_ATTR BIT(13)
66 #define MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR BIT(14)
67 #define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR BIT(15)
68 #define MLX5_IB_RP_GD_ATTR BIT(16)
70 #define MLX5_IB_NP_CNP_DSCP_ATTR BIT(3)
71 #define MLX5_IB_NP_CNP_PRIO_MODE_ATTR BIT(4)
73 enum mlx5_ib_cong_node_type {
74 MLX5_IB_RROCE_ECN_RP = 1,
75 MLX5_IB_RROCE_ECN_NP = 2,
78 static enum mlx5_ib_cong_node_type
79 mlx5_ib_param_to_node(u32 index)
82 if (index >= MLX5_IB_INDEX(rp_clamp_tgt_rate) &&
83 index <= MLX5_IB_INDEX(rp_gd))
84 return MLX5_IB_RROCE_ECN_RP;
86 return MLX5_IB_RROCE_ECN_NP;
90 mlx5_get_cc_param_val(void *field, u32 index)
94 case MLX5_IB_INDEX(rp_clamp_tgt_rate):
95 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
97 case MLX5_IB_INDEX(rp_clamp_tgt_rate_ati):
98 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
99 clamp_tgt_rate_after_time_inc);
100 case MLX5_IB_INDEX(rp_time_reset):
101 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
103 case MLX5_IB_INDEX(rp_byte_reset):
104 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
106 case MLX5_IB_INDEX(rp_threshold):
107 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
109 case MLX5_IB_INDEX(rp_ai_rate):
110 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
112 case MLX5_IB_INDEX(rp_hai_rate):
113 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
115 case MLX5_IB_INDEX(rp_min_dec_fac):
116 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
118 case MLX5_IB_INDEX(rp_min_rate):
119 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
121 case MLX5_IB_INDEX(rp_rate_to_set_on_first_cnp):
122 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
123 rate_to_set_on_first_cnp);
124 case MLX5_IB_INDEX(rp_dce_tcp_g):
125 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
127 case MLX5_IB_INDEX(rp_dce_tcp_rtt):
128 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
130 case MLX5_IB_INDEX(rp_rate_reduce_monitor_period):
131 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
132 rate_reduce_monitor_period);
133 case MLX5_IB_INDEX(rp_initial_alpha_value):
134 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
135 initial_alpha_value);
136 case MLX5_IB_INDEX(rp_gd):
137 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
139 case MLX5_IB_INDEX(np_cnp_dscp):
140 return MLX5_GET(cong_control_r_roce_ecn_np, field,
142 case MLX5_IB_INDEX(np_cnp_prio_mode):
143 return MLX5_GET(cong_control_r_roce_ecn_np, field,
145 case MLX5_IB_INDEX(np_cnp_prio):
146 return MLX5_GET(cong_control_r_roce_ecn_np, field,
154 mlx5_ib_set_cc_param_mask_val(void *field, u32 index,
155 u64 var, u32 *attr_mask)
159 case MLX5_IB_INDEX(rp_clamp_tgt_rate):
160 *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATTR;
161 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
162 clamp_tgt_rate, var);
164 case MLX5_IB_INDEX(rp_clamp_tgt_rate_ati):
165 *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR;
166 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
167 clamp_tgt_rate_after_time_inc, var);
169 case MLX5_IB_INDEX(rp_time_reset):
170 *attr_mask |= MLX5_IB_RP_TIME_RESET_ATTR;
171 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
172 rpg_time_reset, var);
174 case MLX5_IB_INDEX(rp_byte_reset):
175 *attr_mask |= MLX5_IB_RP_BYTE_RESET_ATTR;
176 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
177 rpg_byte_reset, var);
179 case MLX5_IB_INDEX(rp_threshold):
180 *attr_mask |= MLX5_IB_RP_THRESHOLD_ATTR;
181 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
184 case MLX5_IB_INDEX(rp_ai_rate):
185 *attr_mask |= MLX5_IB_RP_AI_RATE_ATTR;
186 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
189 case MLX5_IB_INDEX(rp_hai_rate):
190 *attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR;
191 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
194 case MLX5_IB_INDEX(rp_min_dec_fac):
195 *attr_mask |= MLX5_IB_RP_MIN_DEC_FAC_ATTR;
196 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
197 rpg_min_dec_fac, var);
199 case MLX5_IB_INDEX(rp_min_rate):
200 *attr_mask |= MLX5_IB_RP_MIN_RATE_ATTR;
201 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
204 case MLX5_IB_INDEX(rp_rate_to_set_on_first_cnp):
205 *attr_mask |= MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR;
206 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
207 rate_to_set_on_first_cnp, var);
209 case MLX5_IB_INDEX(rp_dce_tcp_g):
210 *attr_mask |= MLX5_IB_RP_DCE_TCP_G_ATTR;
211 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
214 case MLX5_IB_INDEX(rp_dce_tcp_rtt):
215 *attr_mask |= MLX5_IB_RP_DCE_TCP_RTT_ATTR;
216 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
219 case MLX5_IB_INDEX(rp_rate_reduce_monitor_period):
220 *attr_mask |= MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR;
221 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
222 rate_reduce_monitor_period, var);
224 case MLX5_IB_INDEX(rp_initial_alpha_value):
225 *attr_mask |= MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR;
226 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
227 initial_alpha_value, var);
229 case MLX5_IB_INDEX(rp_gd):
230 *attr_mask |= MLX5_IB_RP_GD_ATTR;
231 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
234 case MLX5_IB_INDEX(np_cnp_dscp):
235 *attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR;
236 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_np, field, cnp_dscp, var);
238 case MLX5_IB_INDEX(np_cnp_prio_mode):
239 *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
240 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_np, field, cnp_prio_mode, var);
242 case MLX5_IB_INDEX(np_cnp_prio):
243 *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
244 MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0);
245 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var);
253 mlx5_ib_get_all_cc_params(struct mlx5_ib_dev *dev)
255 int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out);
256 enum mlx5_ib_cong_node_type node = 0;
262 out = kzalloc(outlen, GFP_KERNEL);
266 /* get the current values */
267 for (x = 0; x != MLX5_IB_CONG_PARAMS_NUM; x++) {
268 if (node != mlx5_ib_param_to_node(x)) {
269 node = mlx5_ib_param_to_node(x);
271 err = mlx5_cmd_query_cong_params(dev->mdev, node, out, outlen);
275 field = MLX5_ADDR_OF(query_cong_params_out, out, congestion_parameters);
276 dev->congestion.arg[x] = mlx5_get_cc_param_val(field, x);
283 mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u32 index, u64 var)
285 int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in);
286 enum mlx5_ib_cong_node_type node;
292 in = kzalloc(inlen, GFP_KERNEL);
296 MLX5_SET(modify_cong_params_in, in, opcode,
297 MLX5_CMD_OP_MODIFY_CONG_PARAMS);
299 node = mlx5_ib_param_to_node(index);
300 MLX5_SET(modify_cong_params_in, in, cong_protocol, node);
302 field = MLX5_ADDR_OF(modify_cong_params_in, in, congestion_parameters);
303 mlx5_ib_set_cc_param_mask_val(field, index, var, &attr_mask);
305 field = MLX5_ADDR_OF(modify_cong_params_in, in, field_select);
306 MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp,
309 err = mlx5_cmd_modify_cong_params(dev->mdev, in, inlen);
316 mlx5_ib_cong_params_handler(SYSCTL_HANDLER_ARGS)
318 struct mlx5_ib_dev *dev = arg1;
323 value = dev->congestion.arg[arg2];
325 error = sysctl_handle_64(oidp, &value, 0, req);
326 if (error || req->newptr == NULL ||
327 value == dev->congestion.arg[arg2])
330 /* assign new value */
331 dev->congestion.arg[arg2] = value;
335 if (!MLX5_CAP_GEN(dev->mdev, cc_modify_allowed))
338 error = -mlx5_ib_set_cc_params(dev, MLX5_IB_INDEX(arg[arg2]),
339 dev->congestion.arg[arg2]);
347 #define MLX5_GET_UNALIGNED_64(t,p,f) \
348 (((u64)MLX5_GET(t,p,f##_high) << 32) | MLX5_GET(t,p,f##_low))
351 mlx5_ib_read_cong_stats(struct work_struct *work)
353 struct mlx5_ib_dev *dev =
354 container_of(work, struct mlx5_ib_dev, congestion.dwork.work);
355 const int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
358 out = kzalloc(outlen, GFP_KERNEL);
363 if (mlx5_cmd_query_cong_counter(dev->mdev, 0, out, outlen))
364 memset(out, 0, outlen);
366 dev->congestion.syndrome =
367 MLX5_GET(query_cong_statistics_out, out, syndrome);
368 dev->congestion.rp_cur_flows =
369 MLX5_GET(query_cong_statistics_out, out, rp_cur_flows);
370 dev->congestion.sum_flows =
371 MLX5_GET(query_cong_statistics_out, out, sum_flows);
372 dev->congestion.rp_cnp_ignored =
373 MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, rp_cnp_ignored);
374 dev->congestion.rp_cnp_handled =
375 MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, rp_cnp_handled);
376 dev->congestion.time_stamp =
377 MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, time_stamp);
378 dev->congestion.accumulators_period =
379 MLX5_GET(query_cong_statistics_out, out, accumulators_period);
380 dev->congestion.np_ecn_marked_roce_packets =
381 MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, np_ecn_marked_roce_packets);
382 dev->congestion.np_cnp_sent =
383 MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, np_cnp_sent);
389 schedule_delayed_work(&dev->congestion.dwork, hz);
393 mlx5_ib_cleanup_congestion(struct mlx5_ib_dev *dev)
396 while (cancel_delayed_work_sync(&dev->congestion.dwork))
398 sysctl_ctx_free(&dev->congestion.ctx);
399 sx_destroy(&dev->congestion.lock);
403 mlx5_ib_init_congestion(struct mlx5_ib_dev *dev)
405 struct sysctl_ctx_list *ctx;
406 struct sysctl_oid *parent;
407 struct sysctl_oid *node;
411 ctx = &dev->congestion.ctx;
412 sysctl_ctx_init(ctx);
413 sx_init(&dev->congestion.lock, "mlx5ibcong");
414 INIT_DELAYED_WORK(&dev->congestion.dwork, mlx5_ib_read_cong_stats);
416 if (!MLX5_CAP_GEN(dev->mdev, cc_query_allowed))
419 err = mlx5_ib_get_all_cc_params(dev);
423 parent = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(dev->ib_dev.dev.kobj.oidp),
424 OID_AUTO, "cong", CTLFLAG_RW, NULL, "Congestion control");
428 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(parent),
429 OID_AUTO, "conf", CTLFLAG_RW, NULL, "Configuration");
431 sysctl_ctx_free(&dev->congestion.ctx);
435 for (x = 0; x != MLX5_IB_CONG_PARAMS_NUM; x++) {
437 SYSCTL_CHILDREN(node), OID_AUTO,
438 mlx5_ib_cong_params_desc[2 * x],
439 CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
440 dev, x, &mlx5_ib_cong_params_handler, "QU",
441 mlx5_ib_cong_params_desc[2 * x + 1]);
444 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(parent),
445 OID_AUTO, "stats", CTLFLAG_RD, NULL, "Statistics");
447 sysctl_ctx_free(&dev->congestion.ctx);
451 for (x = 0; x != MLX5_IB_CONG_STATS_NUM; x++) {
452 /* read-only SYSCTLs */
453 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
454 mlx5_ib_cong_stats_desc[2 * x],
455 CTLFLAG_RD | CTLFLAG_MPSAFE,
456 &dev->congestion.arg[x + MLX5_IB_CONG_PARAMS_NUM],
457 0, mlx5_ib_cong_stats_desc[2 * x + 1]);
459 schedule_delayed_work(&dev->congestion.dwork, hz);