]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/mlx5/mlx5_ib/mlx5_ib_cong.c
MFC r347304:
[FreeBSD/FreeBSD.git] / sys / dev / mlx5 / mlx5_ib / mlx5_ib_cong.c
1 /*-
2  * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27
28 #include "mlx5_ib.h"
29
30 #include <dev/mlx5/cmd.h>
31
32 static const char *mlx5_ib_cong_params_desc[] = {
33         MLX5_IB_CONG_PARAMS(MLX5_IB_STATS_DESC)
34 };
35
36 static const char *mlx5_ib_cong_stats_desc[] = {
37         MLX5_IB_CONG_STATS(MLX5_IB_STATS_DESC)
38 };
39
40 #define MLX5_IB_INDEX(field) (__offsetof(struct mlx5_ib_congestion, field) / sizeof(u64))
41 #define MLX5_IB_FLD_MAX(type, field) ((1ULL << __mlx5_bit_sz(type, field)) - 1ULL)
42 #define MLX5_IB_SET_CLIPPED(type, ptr, field, var) do { \
43   /* rangecheck */                                      \
44   if ((var) > MLX5_IB_FLD_MAX(type, field))             \
45         (var) = MLX5_IB_FLD_MAX(type, field);           \
46   /* set value */                                       \
47   MLX5_SET(type, ptr, field, var);                      \
48 } while (0)
49
50 #define CONG_LOCK(dev) sx_xlock(&(dev)->congestion.lock)
51 #define CONG_UNLOCK(dev) sx_xunlock(&(dev)->congestion.lock)
52 #define CONG_LOCKED(dev) sx_xlocked(&(dev)->congestion.lock)
53
54 #define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR                  BIT(1)
55 #define MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR              BIT(2)
56 #define MLX5_IB_RP_TIME_RESET_ATTR                      BIT(3)
57 #define MLX5_IB_RP_BYTE_RESET_ATTR                      BIT(4)
58 #define MLX5_IB_RP_THRESHOLD_ATTR                       BIT(5)
59 #define MLX5_IB_RP_AI_RATE_ATTR                         BIT(7)
60 #define MLX5_IB_RP_HAI_RATE_ATTR                        BIT(8)
61 #define MLX5_IB_RP_MIN_DEC_FAC_ATTR                     BIT(9)
62 #define MLX5_IB_RP_MIN_RATE_ATTR                        BIT(10)
63 #define MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR        BIT(11)
64 #define MLX5_IB_RP_DCE_TCP_G_ATTR                       BIT(12)
65 #define MLX5_IB_RP_DCE_TCP_RTT_ATTR                     BIT(13)
66 #define MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR      BIT(14)
67 #define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR             BIT(15)
68 #define MLX5_IB_RP_GD_ATTR                              BIT(16)
69
70 #define MLX5_IB_NP_CNP_DSCP_ATTR                        BIT(3)
71 #define MLX5_IB_NP_CNP_PRIO_MODE_ATTR                   BIT(4)
72
73 enum mlx5_ib_cong_node_type {
74         MLX5_IB_RROCE_ECN_RP = 1,
75         MLX5_IB_RROCE_ECN_NP = 2,
76 };
77
78 static enum mlx5_ib_cong_node_type
79 mlx5_ib_param_to_node(u32 index)
80 {
81
82         if (index >= MLX5_IB_INDEX(rp_clamp_tgt_rate) &&
83             index <= MLX5_IB_INDEX(rp_gd))
84                 return MLX5_IB_RROCE_ECN_RP;
85         else
86                 return MLX5_IB_RROCE_ECN_NP;
87 }
88
89 static u64
90 mlx5_get_cc_param_val(void *field, u32 index)
91 {
92
93         switch (index) {
94         case MLX5_IB_INDEX(rp_clamp_tgt_rate):
95                 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
96                                 clamp_tgt_rate);
97         case MLX5_IB_INDEX(rp_clamp_tgt_rate_ati):
98                 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
99                                 clamp_tgt_rate_after_time_inc);
100         case MLX5_IB_INDEX(rp_time_reset):
101                 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
102                                 rpg_time_reset);
103         case MLX5_IB_INDEX(rp_byte_reset):
104                 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
105                                 rpg_byte_reset);
106         case MLX5_IB_INDEX(rp_threshold):
107                 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
108                                 rpg_threshold);
109         case MLX5_IB_INDEX(rp_ai_rate):
110                 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
111                                 rpg_ai_rate);
112         case MLX5_IB_INDEX(rp_hai_rate):
113                 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
114                                 rpg_hai_rate);
115         case MLX5_IB_INDEX(rp_min_dec_fac):
116                 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
117                                 rpg_min_dec_fac);
118         case MLX5_IB_INDEX(rp_min_rate):
119                 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
120                                 rpg_min_rate);
121         case MLX5_IB_INDEX(rp_rate_to_set_on_first_cnp):
122                 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
123                                 rate_to_set_on_first_cnp);
124         case MLX5_IB_INDEX(rp_dce_tcp_g):
125                 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
126                                 dce_tcp_g);
127         case MLX5_IB_INDEX(rp_dce_tcp_rtt):
128                 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
129                                 dce_tcp_rtt);
130         case MLX5_IB_INDEX(rp_rate_reduce_monitor_period):
131                 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
132                                 rate_reduce_monitor_period);
133         case MLX5_IB_INDEX(rp_initial_alpha_value):
134                 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
135                                 initial_alpha_value);
136         case MLX5_IB_INDEX(rp_gd):
137                 return MLX5_GET(cong_control_r_roce_ecn_rp, field,
138                                 rpg_gd);
139         case MLX5_IB_INDEX(np_cnp_dscp):
140                 return MLX5_GET(cong_control_r_roce_ecn_np, field,
141                                 cnp_dscp);
142         case MLX5_IB_INDEX(np_cnp_prio_mode):
143                 return MLX5_GET(cong_control_r_roce_ecn_np, field,
144                                 cnp_prio_mode);
145         case MLX5_IB_INDEX(np_cnp_prio):
146                 return MLX5_GET(cong_control_r_roce_ecn_np, field,
147                                 cnp_802p_prio);
148         default:
149                 return 0;
150         }
151 }
152
153 static void
154 mlx5_ib_set_cc_param_mask_val(void *field, u32 index,
155     u64 var, u32 *attr_mask)
156 {
157
158         switch (index) {
159         case MLX5_IB_INDEX(rp_clamp_tgt_rate):
160                 *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATTR;
161                 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
162                          clamp_tgt_rate, var);
163                 break;
164         case MLX5_IB_INDEX(rp_clamp_tgt_rate_ati):
165                 *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR;
166                 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
167                          clamp_tgt_rate_after_time_inc, var);
168                 break;
169         case MLX5_IB_INDEX(rp_time_reset):
170                 *attr_mask |= MLX5_IB_RP_TIME_RESET_ATTR;
171                 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
172                          rpg_time_reset, var);
173                 break;
174         case MLX5_IB_INDEX(rp_byte_reset):
175                 *attr_mask |= MLX5_IB_RP_BYTE_RESET_ATTR;
176                 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
177                          rpg_byte_reset, var);
178                 break;
179         case MLX5_IB_INDEX(rp_threshold):
180                 *attr_mask |= MLX5_IB_RP_THRESHOLD_ATTR;
181                 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
182                          rpg_threshold, var);
183                 break;
184         case MLX5_IB_INDEX(rp_ai_rate):
185                 *attr_mask |= MLX5_IB_RP_AI_RATE_ATTR;
186                 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
187                          rpg_ai_rate, var);
188                 break;
189         case MLX5_IB_INDEX(rp_hai_rate):
190                 *attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR;
191                 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
192                          rpg_hai_rate, var);
193                 break;
194         case MLX5_IB_INDEX(rp_min_dec_fac):
195                 *attr_mask |= MLX5_IB_RP_MIN_DEC_FAC_ATTR;
196                 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
197                          rpg_min_dec_fac, var);
198                 break;
199         case MLX5_IB_INDEX(rp_min_rate):
200                 *attr_mask |= MLX5_IB_RP_MIN_RATE_ATTR;
201                 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
202                          rpg_min_rate, var);
203                 break;
204         case MLX5_IB_INDEX(rp_rate_to_set_on_first_cnp):
205                 *attr_mask |= MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR;
206                 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
207                          rate_to_set_on_first_cnp, var);
208                 break;
209         case MLX5_IB_INDEX(rp_dce_tcp_g):
210                 *attr_mask |= MLX5_IB_RP_DCE_TCP_G_ATTR;
211                 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
212                          dce_tcp_g, var);
213                 break;
214         case MLX5_IB_INDEX(rp_dce_tcp_rtt):
215                 *attr_mask |= MLX5_IB_RP_DCE_TCP_RTT_ATTR;
216                 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
217                          dce_tcp_rtt, var);
218                 break;
219         case MLX5_IB_INDEX(rp_rate_reduce_monitor_period):
220                 *attr_mask |= MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR;
221                 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
222                          rate_reduce_monitor_period, var);
223                 break;
224         case MLX5_IB_INDEX(rp_initial_alpha_value):
225                 *attr_mask |= MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR;
226                 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
227                          initial_alpha_value, var);
228                 break;
229         case MLX5_IB_INDEX(rp_gd):
230                 *attr_mask |= MLX5_IB_RP_GD_ATTR;
231                 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_rp, field,
232                          rpg_gd, var);
233                 break;
234         case MLX5_IB_INDEX(np_cnp_dscp):
235                 *attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR;
236                 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_np, field, cnp_dscp, var);
237                 break;
238         case MLX5_IB_INDEX(np_cnp_prio_mode):
239                 *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
240                 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_np, field, cnp_prio_mode, var);
241                 break;
242         case MLX5_IB_INDEX(np_cnp_prio):
243                 *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
244                 MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0);
245                 MLX5_IB_SET_CLIPPED(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var);
246                 break;
247         default:
248                 break;
249         }
250 }
251
252 static int
253 mlx5_ib_get_all_cc_params(struct mlx5_ib_dev *dev)
254 {
255         int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out);
256         enum mlx5_ib_cong_node_type node = 0;
257         void *out;
258         void *field;
259         u32 x;
260         int err = 0;
261
262         out = kzalloc(outlen, GFP_KERNEL);
263         if (!out)
264                 return -ENOMEM;
265
266         /* get the current values */
267         for (x = 0; x != MLX5_IB_CONG_PARAMS_NUM; x++) {
268                 if (node != mlx5_ib_param_to_node(x)) {
269                         node = mlx5_ib_param_to_node(x);
270
271                         err = mlx5_cmd_query_cong_params(dev->mdev, node, out, outlen);
272                         if (err)
273                                 break;
274                 }
275                 field = MLX5_ADDR_OF(query_cong_params_out, out, congestion_parameters);
276                 dev->congestion.arg[x] = mlx5_get_cc_param_val(field, x);
277         }
278         kfree(out);
279         return err;
280 }
281
282 static int
283 mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u32 index, u64 var)
284 {
285         int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in);
286         enum mlx5_ib_cong_node_type node;
287         u32 attr_mask = 0;
288         void *field;
289         void *in;
290         int err;
291
292         in = kzalloc(inlen, GFP_KERNEL);
293         if (!in)
294                 return -ENOMEM;
295
296         MLX5_SET(modify_cong_params_in, in, opcode,
297                  MLX5_CMD_OP_MODIFY_CONG_PARAMS);
298
299         node = mlx5_ib_param_to_node(index);
300         MLX5_SET(modify_cong_params_in, in, cong_protocol, node);
301
302         field = MLX5_ADDR_OF(modify_cong_params_in, in, congestion_parameters);
303         mlx5_ib_set_cc_param_mask_val(field, index, var, &attr_mask);
304
305         field = MLX5_ADDR_OF(modify_cong_params_in, in, field_select);
306         MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp,
307                  attr_mask);
308
309         err = mlx5_cmd_modify_cong_params(dev->mdev, in, inlen);
310         kfree(in);
311
312         return err;
313 }
314
315 static int
316 mlx5_ib_cong_params_handler(SYSCTL_HANDLER_ARGS)
317 {
318         struct mlx5_ib_dev *dev = arg1;
319         u64 value;
320         int error;
321
322         CONG_LOCK(dev);
323         value = dev->congestion.arg[arg2];
324         if (req != NULL) {
325                 error = sysctl_handle_64(oidp, &value, 0, req);
326                 if (error || req->newptr == NULL ||
327                     value == dev->congestion.arg[arg2])
328                         goto done;
329
330                 /* assign new value */
331                 dev->congestion.arg[arg2] = value;
332         } else {
333                 error = 0;
334         }
335         if (!MLX5_CAP_GEN(dev->mdev, cc_modify_allowed))
336                 error = EPERM;
337         else {
338                 error = -mlx5_ib_set_cc_params(dev, MLX5_IB_INDEX(arg[arg2]),
339                     dev->congestion.arg[arg2]);
340         }
341 done:
342         CONG_UNLOCK(dev);
343
344         return (error);
345 }
346
347 #define MLX5_GET_UNALIGNED_64(t,p,f) \
348     (((u64)MLX5_GET(t,p,f##_high) << 32) | MLX5_GET(t,p,f##_low))
349
350 static void
351 mlx5_ib_read_cong_stats(struct work_struct *work)
352 {
353         struct mlx5_ib_dev *dev =
354             container_of(work, struct mlx5_ib_dev, congestion.dwork.work);
355         const int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
356         void *out;
357
358         out = kzalloc(outlen, GFP_KERNEL);
359         if (!out)
360                 goto done;
361
362         CONG_LOCK(dev);
363         if (mlx5_cmd_query_cong_counter(dev->mdev, 0, out, outlen))
364                 memset(out, 0, outlen);
365
366         dev->congestion.syndrome =
367             MLX5_GET(query_cong_statistics_out, out, syndrome);
368         dev->congestion.rp_cur_flows =
369             MLX5_GET(query_cong_statistics_out, out, rp_cur_flows);
370         dev->congestion.sum_flows =
371             MLX5_GET(query_cong_statistics_out, out, sum_flows);
372         dev->congestion.rp_cnp_ignored =
373             MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, rp_cnp_ignored);
374         dev->congestion.rp_cnp_handled =
375             MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, rp_cnp_handled);
376         dev->congestion.time_stamp =
377             MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, time_stamp);
378         dev->congestion.accumulators_period =
379             MLX5_GET(query_cong_statistics_out, out, accumulators_period);
380         dev->congestion.np_ecn_marked_roce_packets =
381             MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, np_ecn_marked_roce_packets);
382         dev->congestion.np_cnp_sent =
383             MLX5_GET_UNALIGNED_64(query_cong_statistics_out, out, np_cnp_sent);
384
385         CONG_UNLOCK(dev);
386         kfree(out);
387
388 done:
389         schedule_delayed_work(&dev->congestion.dwork, hz);
390 }
391
392 void
393 mlx5_ib_cleanup_congestion(struct mlx5_ib_dev *dev)
394 {
395
396         while (cancel_delayed_work_sync(&dev->congestion.dwork))
397                 ;
398         sysctl_ctx_free(&dev->congestion.ctx);
399         sx_destroy(&dev->congestion.lock);
400 }
401
402 int
403 mlx5_ib_init_congestion(struct mlx5_ib_dev *dev)
404 {
405         struct sysctl_ctx_list *ctx;
406         struct sysctl_oid *parent;
407         struct sysctl_oid *node;
408         int err;
409         u32 x;
410
411         ctx = &dev->congestion.ctx;
412         sysctl_ctx_init(ctx);
413         sx_init(&dev->congestion.lock, "mlx5ibcong");
414         INIT_DELAYED_WORK(&dev->congestion.dwork, mlx5_ib_read_cong_stats);
415
416         if (!MLX5_CAP_GEN(dev->mdev, cc_query_allowed))
417                 return (0);
418
419         err = mlx5_ib_get_all_cc_params(dev);
420         if (err)
421                 return (err);
422
423         parent = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(dev->ib_dev.dev.kobj.oidp),
424             OID_AUTO, "cong", CTLFLAG_RW, NULL, "Congestion control");
425         if (parent == NULL)
426                 return (-ENOMEM);
427
428         node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(parent),
429             OID_AUTO, "conf", CTLFLAG_RW, NULL, "Configuration");
430         if (node == NULL) {
431                 sysctl_ctx_free(&dev->congestion.ctx);
432                 return (-ENOMEM);
433         }
434
435         for (x = 0; x != MLX5_IB_CONG_PARAMS_NUM; x++) {
436                 SYSCTL_ADD_PROC(ctx,
437                     SYSCTL_CHILDREN(node), OID_AUTO,
438                     mlx5_ib_cong_params_desc[2 * x],
439                     CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
440                     dev, x, &mlx5_ib_cong_params_handler, "QU",
441                     mlx5_ib_cong_params_desc[2 * x + 1]);
442         }
443
444         node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(parent),
445             OID_AUTO, "stats", CTLFLAG_RD, NULL, "Statistics");
446         if (node == NULL) {
447                 sysctl_ctx_free(&dev->congestion.ctx);
448                 return (-ENOMEM);
449         }
450
451         for (x = 0; x != MLX5_IB_CONG_STATS_NUM; x++) {
452                 /* read-only SYSCTLs */
453                 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
454                     mlx5_ib_cong_stats_desc[2 * x],
455                     CTLFLAG_RD | CTLFLAG_MPSAFE,
456                     &dev->congestion.arg[x + MLX5_IB_CONG_PARAMS_NUM],
457                     0, mlx5_ib_cong_stats_desc[2 * x + 1]);
458         }
459         schedule_delayed_work(&dev->congestion.dwork, hz);
460         return (0);
461 }