2 * SPDX-License-Identifier: BSD-2-Clause
4 * Copyright (c) 2015-2021 Amazon.com, Inc. or its affiliates.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #include <sys/cdefs.h>
31 #include <sys/param.h>
35 #include "ena_sysctl.h"
37 static void ena_sysctl_add_wd(struct ena_adapter *);
38 static void ena_sysctl_add_stats(struct ena_adapter *);
39 static void ena_sysctl_add_eni_metrics(struct ena_adapter *);
40 static void ena_sysctl_add_tuneables(struct ena_adapter *);
41 /* Kernel option RSS prevents manipulation of key hash and indirection table. */
43 static void ena_sysctl_add_rss(struct ena_adapter *);
45 static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS);
46 static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS);
47 static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS);
48 static int ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS);
50 static int ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS);
51 static int ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS);
54 /* Limit max ENI sample rate to be an hour. */
55 #define ENI_METRICS_MAX_SAMPLE_INTERVAL 3600
56 #define ENA_HASH_KEY_MSG_SIZE (ENA_HASH_KEY_SIZE * 2 + 1)
58 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
59 "ENA driver parameters");
62 * Logging level for changing verbosity of the output
64 int ena_log_level = ENA_INFO;
65 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN, &ena_log_level, 0,
66 "Logging level indicating verbosity of the logs");
68 SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD,
69 ENA_DRV_MODULE_VERSION, "ENA driver version");
72 * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead).
73 * Using 9k mbufs in low memory conditions might cause allocation to take a lot
74 * of time and lead to the OS instability as it needs to look for the contiguous
76 * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if
77 * the network performance is the priority, the 9k mbufs can be used.
79 int ena_enable_9k_mbufs = 0;
80 SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN,
81 &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors");
84 * Force the driver to use large LLQ (Low Latency Queue) header. Defaults to
85 * false. This option may be important for platforms, which often handle packet
86 * headers on Tx with total header size greater than 96B, as it may
88 * It also reduces the maximum Tx queue size by half, so it may cause more Tx
91 bool ena_force_large_llq_header = false;
92 SYSCTL_BOOL(_hw_ena, OID_AUTO, force_large_llq_header, CTLFLAG_RDTUN,
93 &ena_force_large_llq_header, 0,
94 "Increases maximum supported header size in LLQ mode to 224 bytes, while reducing the maximum Tx queue size by half.\n");
96 int ena_rss_table_size = ENA_RX_RSS_TABLE_SIZE;
99 ena_sysctl_add_nodes(struct ena_adapter *adapter)
101 ena_sysctl_add_wd(adapter);
102 ena_sysctl_add_stats(adapter);
103 ena_sysctl_add_eni_metrics(adapter);
104 ena_sysctl_add_tuneables(adapter);
106 ena_sysctl_add_rss(adapter);
111 ena_sysctl_add_wd(struct ena_adapter *adapter)
115 struct sysctl_ctx_list *ctx;
116 struct sysctl_oid *tree;
117 struct sysctl_oid_list *child;
121 ctx = device_get_sysctl_ctx(dev);
122 tree = device_get_sysctl_tree(dev);
123 child = SYSCTL_CHILDREN(tree);
125 /* Sysctl calls for Watchdog service */
126 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active", CTLFLAG_RWTUN,
127 &adapter->wd_active, 0, "Watchdog is active");
129 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout",
130 CTLFLAG_RWTUN, &adapter->keep_alive_timeout,
131 "Timeout for Keep Alive messages");
133 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout",
134 CTLFLAG_RWTUN, &adapter->missing_tx_timeout,
135 "Timeout for TX completion");
137 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues",
138 CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0,
139 "Number of TX queues to check per run");
141 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold",
142 CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0,
143 "Max number of timeouted packets");
147 ena_sysctl_add_stats(struct ena_adapter *adapter)
151 struct ena_ring *tx_ring;
152 struct ena_ring *rx_ring;
154 struct ena_hw_stats *hw_stats;
155 struct ena_stats_dev *dev_stats;
156 struct ena_stats_tx *tx_stats;
157 struct ena_stats_rx *rx_stats;
158 struct ena_com_stats_admin *admin_stats;
160 struct sysctl_ctx_list *ctx;
161 struct sysctl_oid *tree;
162 struct sysctl_oid_list *child;
164 struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node;
165 struct sysctl_oid *admin_node;
166 struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list;
167 struct sysctl_oid_list *admin_list;
169 #define QUEUE_NAME_LEN 32
170 char namebuf[QUEUE_NAME_LEN];
175 ctx = device_get_sysctl_ctx(dev);
176 tree = device_get_sysctl_tree(dev);
177 child = SYSCTL_CHILDREN(tree);
179 tx_ring = adapter->tx_ring;
180 rx_ring = adapter->rx_ring;
182 hw_stats = &adapter->hw_stats;
183 dev_stats = &adapter->dev_stats;
184 admin_stats = &adapter->ena_dev->admin_queue.stats;
186 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired", CTLFLAG_RD,
187 &dev_stats->wd_expired, "Watchdog expiry count");
188 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up", CTLFLAG_RD,
189 &dev_stats->interface_up, "Network interface up count");
190 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down",
191 CTLFLAG_RD, &dev_stats->interface_down,
192 "Network interface down count");
193 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause",
194 CTLFLAG_RD, &dev_stats->admin_q_pause, "Admin queue pauses");
196 for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) {
197 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
199 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
200 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name");
201 queue_list = SYSCTL_CHILDREN(queue_node);
203 adapter->que[i].oid = queue_node;
207 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "cpu", CTLFLAG_RD,
208 &adapter->que[i].cpu, 0, "CPU affinity");
209 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "domain", CTLFLAG_RD,
210 &adapter->que[i].domain, 0, "NUMA domain");
213 /* TX specific stats */
214 tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "tx_ring",
215 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring");
216 tx_list = SYSCTL_CHILDREN(tx_node);
218 tx_stats = &tx_ring->tx_stats;
220 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "count",
221 CTLFLAG_RD, &tx_stats->cnt, "Packets sent");
222 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bytes",
223 CTLFLAG_RD, &tx_stats->bytes, "Bytes sent");
224 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
225 "prepare_ctx_err", CTLFLAG_RD, &tx_stats->prepare_ctx_err,
226 "TX buffer preparation failures");
227 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
228 "dma_mapping_err", CTLFLAG_RD, &tx_stats->dma_mapping_err,
229 "DMA mapping failures");
230 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "doorbells",
231 CTLFLAG_RD, &tx_stats->doorbells, "Queue doorbells");
232 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
233 "missing_tx_comp", CTLFLAG_RD, &tx_stats->missing_tx_comp,
234 "TX completions missed");
235 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bad_req_id",
236 CTLFLAG_RD, &tx_stats->bad_req_id, "Bad request id count");
237 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "mbuf_collapses",
238 CTLFLAG_RD, &tx_stats->collapse, "Mbuf collapse count");
239 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
240 "mbuf_collapse_err", CTLFLAG_RD, &tx_stats->collapse_err,
241 "Mbuf collapse failures");
242 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_wakeups",
243 CTLFLAG_RD, &tx_stats->queue_wakeup, "Queue wakeups");
244 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_stops",
245 CTLFLAG_RD, &tx_stats->queue_stop, "Queue stops");
246 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
247 "llq_buffer_copy", CTLFLAG_RD, &tx_stats->llq_buffer_copy,
248 "Header copies for llq transaction");
249 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
250 "unmask_interrupt_num", CTLFLAG_RD,
251 &tx_stats->unmask_interrupt_num,
252 "Unmasked interrupt count");
254 /* RX specific stats */
255 rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "rx_ring",
256 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring");
257 rx_list = SYSCTL_CHILDREN(rx_node);
259 rx_stats = &rx_ring->rx_stats;
261 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "count",
262 CTLFLAG_RD, &rx_stats->cnt, "Packets received");
263 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bytes",
264 CTLFLAG_RD, &rx_stats->bytes, "Bytes received");
265 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "refil_partial",
266 CTLFLAG_RD, &rx_stats->refil_partial,
267 "Partial refilled mbufs");
268 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_bad",
269 CTLFLAG_RD, &rx_stats->csum_bad, "Bad RX checksum");
270 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
271 "mbuf_alloc_fail", CTLFLAG_RD, &rx_stats->mbuf_alloc_fail,
272 "Failed mbuf allocs");
273 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
274 "mjum_alloc_fail", CTLFLAG_RD, &rx_stats->mjum_alloc_fail,
275 "Failed jumbo mbuf allocs");
276 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
277 "dma_mapping_err", CTLFLAG_RD, &rx_stats->dma_mapping_err,
278 "DMA mapping errors");
279 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_desc_num",
280 CTLFLAG_RD, &rx_stats->bad_desc_num,
281 "Bad descriptor count");
282 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_req_id",
283 CTLFLAG_RD, &rx_stats->bad_req_id, "Bad request id count");
284 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "empty_rx_ring",
285 CTLFLAG_RD, &rx_stats->empty_rx_ring,
286 "RX descriptors depletion count");
287 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_good",
288 CTLFLAG_RD, &rx_stats->csum_good,
289 "Valid RX checksum calculations");
292 /* Stats read from device */
293 hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats",
294 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware");
295 hw_list = SYSCTL_CHILDREN(hw_node);
297 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD,
298 &hw_stats->rx_packets, "Packets received");
299 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD,
300 &hw_stats->tx_packets, "Packets transmitted");
301 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD,
302 &hw_stats->rx_bytes, "Bytes received");
303 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD,
304 &hw_stats->tx_bytes, "Bytes transmitted");
305 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD,
306 &hw_stats->rx_drops, "Receive packet drops");
307 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD,
308 &hw_stats->tx_drops, "Transmit packet drops");
310 /* ENA Admin queue stats */
311 admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats",
312 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics");
313 admin_list = SYSCTL_CHILDREN(admin_node);
315 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD,
316 &admin_stats->aborted_cmd, 0, "Aborted commands");
317 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD,
318 &admin_stats->submitted_cmd, 0, "Submitted commands");
319 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD,
320 &admin_stats->completed_cmd, 0, "Completed commands");
321 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD,
322 &admin_stats->out_of_space, 0, "Queue out of space");
323 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD,
324 &admin_stats->no_completion, 0, "Commands not completed");
328 ena_sysctl_add_eni_metrics(struct ena_adapter *adapter)
331 struct ena_admin_eni_stats *eni_metrics;
333 struct sysctl_ctx_list *ctx;
334 struct sysctl_oid *tree;
335 struct sysctl_oid_list *child;
337 struct sysctl_oid *eni_node;
338 struct sysctl_oid_list *eni_list;
342 ctx = device_get_sysctl_ctx(dev);
343 tree = device_get_sysctl_tree(dev);
344 child = SYSCTL_CHILDREN(tree);
346 eni_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eni_metrics",
347 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's ENI metrics");
348 eni_list = SYSCTL_CHILDREN(eni_node);
350 eni_metrics = &adapter->eni_metrics;
352 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_in_allowance_exceeded",
353 CTLFLAG_RD, &eni_metrics->bw_in_allowance_exceeded, 0,
354 "Inbound BW allowance exceeded");
355 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_out_allowance_exceeded",
356 CTLFLAG_RD, &eni_metrics->bw_out_allowance_exceeded, 0,
357 "Outbound BW allowance exceeded");
358 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "pps_allowance_exceeded",
359 CTLFLAG_RD, &eni_metrics->pps_allowance_exceeded, 0,
360 "PPS allowance exceeded");
361 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "conntrack_allowance_exceeded",
362 CTLFLAG_RD, &eni_metrics->conntrack_allowance_exceeded, 0,
363 "Connection tracking allowance exceeded");
364 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "linklocal_allowance_exceeded",
365 CTLFLAG_RD, &eni_metrics->linklocal_allowance_exceeded, 0,
366 "Linklocal packet rate allowance exceeded");
369 * Tuneable, which determines how often ENI metrics will be read.
370 * 0 means it's turned off. Maximum allowed value is limited by:
371 * ENI_METRICS_MAX_SAMPLE_INTERVAL.
373 SYSCTL_ADD_PROC(ctx, eni_list, OID_AUTO, "sample_interval",
374 CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
375 ena_sysctl_eni_metrics_interval, "SU",
376 "Interval in seconds for updating ENI emetrics. 0 turns off the update.");
380 ena_sysctl_add_tuneables(struct ena_adapter *adapter)
384 struct sysctl_ctx_list *ctx;
385 struct sysctl_oid *tree;
386 struct sysctl_oid_list *child;
390 ctx = device_get_sysctl_ctx(dev);
391 tree = device_get_sysctl_tree(dev);
392 child = SYSCTL_CHILDREN(tree);
394 /* Tuneable number of buffers in the buf-ring (drbr) */
395 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size",
396 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
397 ena_sysctl_buf_ring_size, "I",
398 "Size of the Tx buffer ring (drbr).");
400 /* Tuneable number of the Rx ring size */
401 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size",
402 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
403 ena_sysctl_rx_queue_size, "I",
404 "Size of the Rx ring. The size should be a power of 2.");
406 /* Tuneable number of IO queues */
407 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb",
408 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
409 ena_sysctl_io_queues_nb, "I", "Number of IO queues.");
412 /* Kernel option RSS prevents manipulation of key hash and indirection table. */
415 ena_sysctl_add_rss(struct ena_adapter *adapter)
419 struct sysctl_ctx_list *ctx;
420 struct sysctl_oid *tree;
421 struct sysctl_oid_list *child;
425 ctx = device_get_sysctl_ctx(dev);
426 tree = device_get_sysctl_tree(dev);
427 child = SYSCTL_CHILDREN(tree);
430 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rss",
431 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Receive Side Scaling options.");
432 child = SYSCTL_CHILDREN(tree);
435 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "key",
436 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
437 ena_sysctl_rss_key, "A", "RSS key.");
439 /* Tuneable RSS indirection table */
440 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "indir_table",
441 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
442 ena_sysctl_rss_indir_table, "A", "RSS indirection table.");
444 /* RSS indirection table size */
445 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "indir_table_size",
446 CTLFLAG_RD | CTLFLAG_MPSAFE, &ena_rss_table_size, 0,
447 "RSS indirection table size.");
453 * ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes.
455 * Whether the nodes are registered or unregistered depends on a delta between
456 * the `old` and `new` parameters, representing the number of queues.
458 * This function is used to hide sysctl attributes for queue nodes which aren't
459 * currently used by the HW (e.g. after a call to `ena_sysctl_io_queues_nb`).
462 * All unregistered nodes must be registered again at detach, i.e. by a call to
466 ena_sysctl_update_queue_node_nb(struct ena_adapter *adapter, int old, int new)
468 struct sysctl_oid *oid;
472 max = MIN(MAX(old, new), adapter->max_num_io_queues);
474 for (i = min; i < max; ++i) {
475 oid = adapter->que[i].oid;
479 sysctl_unregister_oid(oid);
481 sysctl_register_oid(oid);
487 ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS)
489 struct ena_adapter *adapter = arg1;
494 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
500 error = sysctl_wire_old_buffer(req, sizeof(val));
502 val = adapter->buf_ring_size;
503 error = sysctl_handle_32(oidp, &val, 0, req);
505 if (error != 0 || req->newptr == NULL)
508 if (!powerof2(val) || val == 0) {
509 ena_log(adapter->pdev, ERR,
510 "Requested new Tx buffer ring size (%u) is not a power of 2\n",
516 if (val != adapter->buf_ring_size) {
517 ena_log(adapter->pdev, INFO,
518 "Requested new Tx buffer ring size: %d. Old size: %d\n",
519 val, adapter->buf_ring_size);
521 error = ena_update_buf_ring_size(adapter, val);
523 ena_log(adapter->pdev, ERR,
524 "New Tx buffer ring size is the same as already used: %u\n",
525 adapter->buf_ring_size);
535 ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS)
537 struct ena_adapter *adapter = arg1;
542 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
548 error = sysctl_wire_old_buffer(req, sizeof(val));
550 val = adapter->requested_rx_ring_size;
551 error = sysctl_handle_32(oidp, &val, 0, req);
553 if (error != 0 || req->newptr == NULL)
556 if (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) {
557 ena_log(adapter->pdev, ERR,
558 "Requested new Rx queue size (%u) is out of range: [%u, %u]\n",
559 val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size);
564 /* Check if the parameter is power of 2 */
565 if (!powerof2(val)) {
566 ena_log(adapter->pdev, ERR,
567 "Requested new Rx queue size (%u) is not a power of 2\n",
573 if (val != adapter->requested_rx_ring_size) {
574 ena_log(adapter->pdev, INFO,
575 "Requested new Rx queue size: %u. Old size: %u\n", val,
576 adapter->requested_rx_ring_size);
578 error = ena_update_queue_size(adapter,
579 adapter->requested_tx_ring_size, val);
581 ena_log(adapter->pdev, ERR,
582 "New Rx queue size is the same as already used: %u\n",
583 adapter->requested_rx_ring_size);
593 * Change number of effectively used IO queues adapter->num_io_queues
596 ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS)
598 struct ena_adapter *adapter = arg1;
599 uint32_t old_num_queues, tmp = 0;
603 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
608 error = sysctl_wire_old_buffer(req, sizeof(tmp));
610 tmp = adapter->num_io_queues;
611 error = sysctl_handle_int(oidp, &tmp, 0, req);
613 if (error != 0 || req->newptr == NULL)
617 ena_log(adapter->pdev, ERR,
618 "Requested number of IO queues is zero\n");
624 * The adapter::max_num_io_queues is the HW capability. The system
625 * resources availability may potentially be a tighter limit. Therefore
626 * the relation `adapter::max_num_io_queues >= adapter::msix_vecs`
627 * always holds true, while the `adapter::msix_vecs` is variable across
628 * device reset (`ena_destroy_device()` + `ena_restore_device()`).
630 if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) {
631 ena_log(adapter->pdev, ERR,
632 "Requested number of IO queues is higher than maximum allowed (%u)\n",
633 adapter->msix_vecs - ENA_ADMIN_MSIX_VEC);
637 if (tmp == adapter->num_io_queues) {
638 ena_log(adapter->pdev, ERR,
639 "Requested number of IO queues is equal to current value "
641 adapter->num_io_queues);
643 ena_log(adapter->pdev, INFO,
644 "Requested new number of IO queues: %u, current value: "
646 tmp, adapter->num_io_queues);
648 old_num_queues = adapter->num_io_queues;
649 error = ena_update_io_queue_nb(adapter, tmp);
653 ena_sysctl_update_queue_node_nb(adapter, old_num_queues, tmp);
663 ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS)
665 struct ena_adapter *adapter = arg1;
670 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
675 error = sysctl_wire_old_buffer(req, sizeof(interval));
677 interval = adapter->eni_metrics_sample_interval;
678 error = sysctl_handle_16(oidp, &interval, 0, req);
680 if (error != 0 || req->newptr == NULL)
683 if (interval > ENI_METRICS_MAX_SAMPLE_INTERVAL) {
684 ena_log(adapter->pdev, ERR,
685 "ENI metrics update interval is out of range - maximum allowed value: %d seconds\n",
686 ENI_METRICS_MAX_SAMPLE_INTERVAL);
692 ena_log(adapter->pdev, INFO,
693 "ENI metrics update is now turned off\n");
694 bzero(&adapter->eni_metrics, sizeof(adapter->eni_metrics));
696 ena_log(adapter->pdev, INFO,
697 "ENI metrics update interval is set to: %" PRIu16
702 adapter->eni_metrics_sample_interval = interval;
712 * Change the Receive Side Scaling hash key.
715 ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS)
717 struct ena_adapter *adapter = arg1;
718 struct ena_com_dev *ena_dev = adapter->ena_dev;
719 enum ena_admin_hash_functions ena_func;
720 char msg[ENA_HASH_KEY_MSG_SIZE];
721 char elem[3] = { 0 };
723 u8 rss_key[ENA_HASH_KEY_SIZE];
727 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
732 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
737 error = sysctl_wire_old_buffer(req, sizeof(msg));
741 error = ena_com_get_hash_function(adapter->ena_dev, &ena_func);
743 device_printf(adapter->pdev, "Cannot get hash function\n");
747 if (ena_func != ENA_ADMIN_TOEPLITZ) {
749 device_printf(adapter->pdev, "Unsupported hash algorithm\n");
753 error = ena_rss_get_hash_key(ena_dev, rss_key);
755 device_printf(adapter->pdev, "Cannot get hash key\n");
759 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i)
760 snprintf(&msg[i * 2], 3, "%02x", rss_key[i]);
762 error = sysctl_handle_string(oidp, msg, sizeof(msg), req);
763 if (error != 0 || req->newptr == NULL)
766 if (strlen(msg) != sizeof(msg) - 1) {
768 device_printf(adapter->pdev, "Invalid key size\n");
772 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) {
773 strncpy(elem, &msg[i * 2], 2);
774 rss_key[i] = strtol(elem, &endp, 16);
776 /* Both hex nibbles in the string must be valid to continue. */
777 if (endp == elem || *endp != '\0' || rss_key[i] < 0) {
779 device_printf(adapter->pdev,
780 "Invalid key hex value: '%c'\n", *endp);
785 error = ena_rss_set_hash(ena_dev, rss_key);
787 device_printf(adapter->pdev, "Cannot fill hash key\n");
796 * Change the Receive Side Scaling indirection table.
798 * The sysctl entry string consists of one or more `x:y` keypairs, where
799 * x stands for the table index and y for its new value.
800 * Table indices that don't need to be updated can be omitted from the string
801 * and will retain their existing values. If an index is entered more than once,
802 * the last value is used.
805 * To update two selected indices in the RSS indirection table, e.g. setting
806 * index 0 to queue 5 and then index 5 to queue 0, the below command should be
808 * sysctl dev.ena.0.rss.indir_table="0:5 5:0"
811 ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS)
813 int num_queues, error;
814 struct ena_adapter *adapter = arg1;
815 struct ena_indir *indir;
816 char *msg, *buf, *endp;
820 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
825 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
830 indir = adapter->rss_indir;
831 msg = indir->sysctl_buf;
833 if (unlikely(indir == NULL)) {
838 error = sysctl_handle_string(oidp, msg, sizeof(indir->sysctl_buf), req);
839 if (error != 0 || req->newptr == NULL)
842 num_queues = adapter->num_io_queues;
845 * This sysctl expects msg to be a list of `x:y` record pairs,
846 * where x is the indirection table index and y is its value.
848 for (buf = msg; *buf != '\0'; buf = endp) {
849 idx = strtol(buf, &endp, 10);
851 if (endp == buf || idx < 0) {
852 device_printf(adapter->pdev, "Invalid index: %s\n",
858 if (idx >= ENA_RX_RSS_TABLE_SIZE) {
859 device_printf(adapter->pdev, "Index %d out of range\n",
868 device_printf(adapter->pdev, "Missing ':' separator\n");
873 value = strtol(buf, &endp, 10);
875 if (endp == buf || value < 0) {
876 device_printf(adapter->pdev, "Invalid value: %s\n",
882 if (value >= num_queues) {
883 device_printf(adapter->pdev, "Value %d out of range\n",
889 indir->table[idx] = value;
892 if (error != 0) /* Reload indirection table with last good data. */
893 ena_rss_indir_get(adapter, indir->table);
895 /* At this point msg has been clobbered by sysctl_handle_string. */
896 ena_rss_copy_indir_buf(msg, indir->table);
899 error = ena_rss_indir_set(adapter, indir->table);