4 * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include "ena_sysctl.h"
35 static void ena_sysctl_add_wd(struct ena_adapter *);
36 static void ena_sysctl_add_stats(struct ena_adapter *);
37 static void ena_sysctl_add_tuneables(struct ena_adapter *);
38 static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS);
39 static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS);
40 static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS);
42 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
43 "ENA driver parameters");
46 * Logging level for changing verbosity of the output
48 int ena_log_level = ENA_ALERT | ENA_WARNING;
49 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN,
50 &ena_log_level, 0, "Logging level indicating verbosity of the logs");
52 SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD,
53 DRV_MODULE_VERSION, "ENA driver version");
56 * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead).
57 * Using 9k mbufs in low memory conditions might cause allocation to take a lot
58 * of time and lead to the OS instability as it needs to look for the contiguous
60 * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if
61 * the network performance is the priority, the 9k mbufs can be used.
63 int ena_enable_9k_mbufs = 0;
64 SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN,
65 &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors");
68 ena_sysctl_add_nodes(struct ena_adapter *adapter)
70 ena_sysctl_add_wd(adapter);
71 ena_sysctl_add_stats(adapter);
72 ena_sysctl_add_tuneables(adapter);
76 ena_sysctl_add_wd(struct ena_adapter *adapter)
80 struct sysctl_ctx_list *ctx;
81 struct sysctl_oid *tree;
82 struct sysctl_oid_list *child;
86 ctx = device_get_sysctl_ctx(dev);
87 tree = device_get_sysctl_tree(dev);
88 child = SYSCTL_CHILDREN(tree);
90 /* Sysctl calls for Watchdog service */
91 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active",
92 CTLFLAG_RWTUN, &adapter->wd_active, 0,
93 "Watchdog is active");
95 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout",
96 CTLFLAG_RWTUN, &adapter->keep_alive_timeout,
97 "Timeout for Keep Alive messages");
99 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout",
100 CTLFLAG_RWTUN, &adapter->missing_tx_timeout,
101 "Timeout for TX completion");
103 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues",
104 CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0,
105 "Number of TX queues to check per run");
107 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold",
108 CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0,
109 "Max number of timeouted packets");
113 ena_sysctl_add_stats(struct ena_adapter *adapter)
117 struct ena_ring *tx_ring;
118 struct ena_ring *rx_ring;
120 struct ena_hw_stats *hw_stats;
121 struct ena_stats_dev *dev_stats;
122 struct ena_stats_tx *tx_stats;
123 struct ena_stats_rx *rx_stats;
124 struct ena_com_stats_admin *admin_stats;
126 struct sysctl_ctx_list *ctx;
127 struct sysctl_oid *tree;
128 struct sysctl_oid_list *child;
130 struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node;
131 struct sysctl_oid *admin_node;
132 struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list;
133 struct sysctl_oid_list *admin_list;
135 #define QUEUE_NAME_LEN 32
136 char namebuf[QUEUE_NAME_LEN];
141 ctx = device_get_sysctl_ctx(dev);
142 tree = device_get_sysctl_tree(dev);
143 child = SYSCTL_CHILDREN(tree);
145 tx_ring = adapter->tx_ring;
146 rx_ring = adapter->rx_ring;
148 hw_stats = &adapter->hw_stats;
149 dev_stats = &adapter->dev_stats;
150 admin_stats = &adapter->ena_dev->admin_queue.stats;
152 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired",
153 CTLFLAG_RD, &dev_stats->wd_expired,
154 "Watchdog expiry count");
155 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up",
156 CTLFLAG_RD, &dev_stats->interface_up,
157 "Network interface up count");
158 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down",
159 CTLFLAG_RD, &dev_stats->interface_down,
160 "Network interface down count");
161 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause",
162 CTLFLAG_RD, &dev_stats->admin_q_pause,
163 "Admin queue pauses");
165 for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) {
166 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
168 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
169 namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name");
170 queue_list = SYSCTL_CHILDREN(queue_node);
172 /* TX specific stats */
173 tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO,
174 "tx_ring", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring");
175 tx_list = SYSCTL_CHILDREN(tx_node);
177 tx_stats = &tx_ring->tx_stats;
179 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
181 &tx_stats->cnt, "Packets sent");
182 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
184 &tx_stats->bytes, "Bytes sent");
185 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
186 "prepare_ctx_err", CTLFLAG_RD,
187 &tx_stats->prepare_ctx_err,
188 "TX buffer preparation failures");
189 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
190 "dma_mapping_err", CTLFLAG_RD,
191 &tx_stats->dma_mapping_err, "DMA mapping failures");
192 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
193 "doorbells", CTLFLAG_RD,
194 &tx_stats->doorbells, "Queue doorbells");
195 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
196 "missing_tx_comp", CTLFLAG_RD,
197 &tx_stats->missing_tx_comp, "TX completions missed");
198 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
199 "bad_req_id", CTLFLAG_RD,
200 &tx_stats->bad_req_id, "Bad request id count");
201 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
202 "mbuf_collapses", CTLFLAG_RD,
204 "Mbuf collapse count");
205 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
206 "mbuf_collapse_err", CTLFLAG_RD,
207 &tx_stats->collapse_err,
208 "Mbuf collapse failures");
209 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
210 "queue_wakeups", CTLFLAG_RD,
211 &tx_stats->queue_wakeup, "Queue wakeups");
212 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
213 "queue_stops", CTLFLAG_RD,
214 &tx_stats->queue_stop, "Queue stops");
215 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
216 "llq_buffer_copy", CTLFLAG_RD,
217 &tx_stats->llq_buffer_copy,
218 "Header copies for llq transaction");
220 /* RX specific stats */
221 rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO,
222 "rx_ring", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring");
223 rx_list = SYSCTL_CHILDREN(rx_node);
225 rx_stats = &rx_ring->rx_stats;
227 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
229 &rx_stats->cnt, "Packets received");
230 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
232 &rx_stats->bytes, "Bytes received");
233 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
234 "refil_partial", CTLFLAG_RD,
235 &rx_stats->refil_partial, "Partial refilled mbufs");
236 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
237 "bad_csum", CTLFLAG_RD,
238 &rx_stats->bad_csum, "Bad RX checksum");
239 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
240 "mbuf_alloc_fail", CTLFLAG_RD,
241 &rx_stats->mbuf_alloc_fail, "Failed mbuf allocs");
242 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
243 "mjum_alloc_fail", CTLFLAG_RD,
244 &rx_stats->mjum_alloc_fail, "Failed jumbo mbuf allocs");
245 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
246 "dma_mapping_err", CTLFLAG_RD,
247 &rx_stats->dma_mapping_err, "DMA mapping errors");
248 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
249 "bad_desc_num", CTLFLAG_RD,
250 &rx_stats->bad_desc_num, "Bad descriptor count");
251 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
252 "bad_req_id", CTLFLAG_RD,
253 &rx_stats->bad_req_id, "Bad request id count");
254 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
255 "empty_rx_ring", CTLFLAG_RD,
256 &rx_stats->empty_rx_ring, "RX descriptors depletion count");
259 /* Stats read from device */
260 hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats",
261 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware");
262 hw_list = SYSCTL_CHILDREN(hw_node);
264 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD,
265 &hw_stats->rx_packets, "Packets received");
266 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD,
267 &hw_stats->tx_packets, "Packets transmitted");
268 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD,
269 &hw_stats->rx_bytes, "Bytes received");
270 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD,
271 &hw_stats->tx_bytes, "Bytes transmitted");
272 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD,
273 &hw_stats->rx_drops, "Receive packet drops");
274 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD,
275 &hw_stats->tx_drops, "Transmit packet drops");
277 /* ENA Admin queue stats */
278 admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats",
279 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics");
280 admin_list = SYSCTL_CHILDREN(admin_node);
282 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD,
283 &admin_stats->aborted_cmd, 0, "Aborted commands");
284 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD,
285 &admin_stats->submitted_cmd, 0, "Submitted commands");
286 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD,
287 &admin_stats->completed_cmd, 0, "Completed commands");
288 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD,
289 &admin_stats->out_of_space, 0, "Queue out of space");
290 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD,
291 &admin_stats->no_completion, 0, "Commands not completed");
295 ena_sysctl_add_tuneables(struct ena_adapter *adapter)
299 struct sysctl_ctx_list *ctx;
300 struct sysctl_oid *tree;
301 struct sysctl_oid_list *child;
305 ctx = device_get_sysctl_ctx(dev);
306 tree = device_get_sysctl_tree(dev);
307 child = SYSCTL_CHILDREN(tree);
309 /* Tuneable number of buffers in the buf-ring (drbr) */
310 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size",
311 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
312 ena_sysctl_buf_ring_size, "I",
313 "Size of the Tx buffer ring (drbr).");
315 /* Tuneable number of the Rx ring size */
316 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size",
317 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
318 ena_sysctl_rx_queue_size, "I",
319 "Size of the Rx ring. The size should be a power of 2.");
321 /* Tuneable number of IO queues */
322 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb",
323 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
324 ena_sysctl_io_queues_nb, "I", "Number of IO queues.");
329 ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS)
331 struct ena_adapter *adapter = arg1;
336 error = sysctl_wire_old_buffer(req, sizeof(val));
338 val = adapter->buf_ring_size;
339 error = sysctl_handle_int(oidp, &val, 0, req);
341 if (error != 0 || req->newptr == NULL)
344 if (!powerof2(val) || val == 0) {
345 device_printf(adapter->pdev,
346 "Requested new Tx buffer ring size (%u) is not a power of 2\n",
351 if (val != adapter->buf_ring_size) {
352 device_printf(adapter->pdev,
353 "Requested new Tx buffer ring size: %d. Old size: %d\n",
354 val, adapter->buf_ring_size);
356 error = ena_update_buf_ring_size(adapter, val);
358 device_printf(adapter->pdev,
359 "New Tx buffer ring size is the same as already used: %u\n",
360 adapter->buf_ring_size);
367 ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS)
369 struct ena_adapter *adapter = arg1;
374 error = sysctl_wire_old_buffer(req, sizeof(val));
376 val = adapter->requested_rx_ring_size;
377 error = sysctl_handle_32(oidp, &val, 0, req);
379 if (error != 0 || req->newptr == NULL)
382 if (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) {
383 device_printf(adapter->pdev,
384 "Requested new Rx queue size (%u) is out of range: [%u, %u]\n",
385 val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size);
389 /* Check if the parameter is power of 2 */
390 if (!powerof2(val)) {
391 device_printf(adapter->pdev,
392 "Requested new Rx queue size (%u) is not a power of 2\n",
397 if (val != adapter->requested_rx_ring_size) {
398 device_printf(adapter->pdev,
399 "Requested new Rx queue size: %u. Old size: %u\n",
400 val, adapter->requested_rx_ring_size);
402 error = ena_update_queue_size(adapter,
403 adapter->requested_tx_ring_size, val);
405 device_printf(adapter->pdev,
406 "New Rx queue size is the same as already used: %u\n",
407 adapter->requested_rx_ring_size);
414 * Change number of effectively used IO queues adapter->num_io_queues
417 ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS)
419 struct ena_adapter *adapter = arg1;
423 error = sysctl_wire_old_buffer(req, sizeof(tmp));
425 tmp = adapter->num_io_queues;
426 error = sysctl_handle_int(oidp, &tmp, 0, req);
428 if (error != 0 || req->newptr == NULL)
432 device_printf(adapter->pdev,
433 "Requested number of IO queues is zero\n");
438 * The adapter::max_num_io_queues is the HW capability. The system
439 * resources availability may potentially be a tighter limit. Therefore
440 * the relation `adapter::max_num_io_queues >= adapter::msix_vecs`
441 * always holds true, while the `adapter::msix_vecs` is variable across
442 * device reset (`ena_destroy_device()` + `ena_restore_device()`).
444 if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) {
445 device_printf(adapter->pdev,
446 "Requested number of IO queues is higher than maximum "
447 "allowed (%u)\n", adapter->msix_vecs - ENA_ADMIN_MSIX_VEC);
450 if (tmp == adapter->num_io_queues) {
451 device_printf(adapter->pdev,
452 "Requested number of IO queues is equal to current value "
453 "(%u)\n", adapter->num_io_queues);
455 device_printf(adapter->pdev,
456 "Requested new number of IO queues: %u, current value: "
457 "%u\n", tmp, adapter->num_io_queues);
459 error = ena_update_io_queue_nb(adapter, tmp);