4 * Copyright (c) 2015-2019 Amazon.com, Inc. or its affiliates.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 #include <sys/types.h>
39 #include "ena-com/ena_com.h"
40 #include "ena-com/ena_eth_com.h"
42 #define DRV_MODULE_VER_MAJOR 2
43 #define DRV_MODULE_VER_MINOR 1
44 #define DRV_MODULE_VER_SUBMINOR 1
46 #define DRV_MODULE_NAME "ena"
48 #ifndef DRV_MODULE_VERSION
49 #define DRV_MODULE_VERSION \
50 __XSTRING(DRV_MODULE_VER_MAJOR) "." \
51 __XSTRING(DRV_MODULE_VER_MINOR) "." \
52 __XSTRING(DRV_MODULE_VER_SUBMINOR)
54 #define DEVICE_NAME "Elastic Network Adapter (ENA)"
55 #define DEVICE_DESC "ENA adapter"
57 /* Calculate DMA mask - width for ena cannot exceed 48, so it is safe */
58 #define ENA_DMA_BIT_MASK(x) ((1ULL << (x)) - 1ULL)
60 /* 1 for AENQ + ADMIN */
61 #define ENA_ADMIN_MSIX_VEC 1
62 #define ENA_MAX_MSIX_VEC(io_queues) (ENA_ADMIN_MSIX_VEC + (io_queues))
67 #define ENA_BUS_DMA_SEGS 32
69 #define ENA_DEFAULT_BUF_RING_SIZE 4096
71 #define ENA_DEFAULT_RING_SIZE 1024
74 * Refill Rx queue when number of required descriptors is above
75 * QUEUE_SIZE / ENA_RX_REFILL_THRESH_DIVIDER or ENA_RX_REFILL_THRESH_PACKET
77 #define ENA_RX_REFILL_THRESH_DIVIDER 8
78 #define ENA_RX_REFILL_THRESH_PACKET 256
80 #define ENA_IRQNAME_SIZE 40
82 #define ENA_PKT_MAX_BUFS 19
84 #define ENA_RX_RSS_TABLE_LOG_SIZE 7
85 #define ENA_RX_RSS_TABLE_SIZE (1 << ENA_RX_RSS_TABLE_LOG_SIZE)
87 #define ENA_HASH_KEY_SIZE 40
89 #define ENA_MAX_FRAME_LEN 10000
90 #define ENA_MIN_FRAME_LEN 60
92 #define ENA_TX_RESUME_THRESH (ENA_PKT_MAX_BUFS + 2)
94 #define DB_THRESHOLD 64
98 * TX budget for cleaning. It should be half of the RX budget to reduce amount
99 * of TCP retransmissions.
101 #define TX_BUDGET 128
102 /* RX cleanup budget. -1 stands for infinity. */
103 #define RX_BUDGET 256
105 * How many times we can repeat cleanup in the io irq handling routine if the
106 * RX or TX budget was depleted.
108 #define CLEAN_BUDGET 8
110 #define RX_IRQ_INTERVAL 20
111 #define TX_IRQ_INTERVAL 50
113 #define ENA_MIN_MTU 128
115 #define ENA_TSO_MAXSIZE 65536
117 #define ENA_MMIO_DISABLE_REG_READ BIT(0)
119 #define ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
121 #define ENA_RX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
123 #define ENA_IO_TXQ_IDX(q) (2 * (q))
124 #define ENA_IO_RXQ_IDX(q) (2 * (q) + 1)
126 #define ENA_MGMNT_IRQ_IDX 0
127 #define ENA_IO_IRQ_FIRST_IDX 1
128 #define ENA_IO_IRQ_IDX(q) (ENA_IO_IRQ_FIRST_IDX + (q))
130 #define ENA_MAX_NO_INTERRUPT_ITERATIONS 3
133 * ENA device should send keep alive msg every 1 sec.
134 * We wait for 6 sec just to be on the safe side.
136 #define DEFAULT_KEEP_ALIVE_TO (SBT_1S * 6)
138 /* Time in jiffies before concluding the transmitter is hung. */
139 #define DEFAULT_TX_CMP_TO (SBT_1S * 5)
141 /* Number of queues to check for missing queues per timer tick */
142 #define DEFAULT_TX_MONITORED_QUEUES (4)
144 /* Max number of timeouted packets before device reset */
145 #define DEFAULT_TX_CMP_THRESHOLD (128)
148 * Supported PCI vendor and devices IDs
150 #define PCI_VENDOR_ID_AMAZON 0x1d0f
152 #define PCI_DEV_ID_ENA_PF 0x0ec2
153 #define PCI_DEV_ID_ENA_LLQ_PF 0x1ec2
154 #define PCI_DEV_ID_ENA_VF 0xec20
155 #define PCI_DEV_ID_ENA_LLQ_VF 0xec21
158 * Flags indicating current ENA driver state
161 ENA_FLAG_DEVICE_RUNNING,
164 ENA_FLAG_MSIX_ENABLED,
165 ENA_FLAG_TRIGGER_RESET,
166 ENA_FLAG_ONGOING_RESET,
167 ENA_FLAG_DEV_UP_BEFORE_RESET,
169 ENA_FLAGS_NUMBER = ENA_FLAG_RSS_ACTIVE
172 BITSET_DEFINE(_ena_state, ENA_FLAGS_NUMBER);
173 typedef struct _ena_state ena_state_t;
175 #define ENA_FLAG_ZERO(adapter) \
176 BIT_ZERO(ENA_FLAGS_NUMBER, &(adapter)->flags)
177 #define ENA_FLAG_ISSET(bit, adapter) \
178 BIT_ISSET(ENA_FLAGS_NUMBER, (bit), &(adapter)->flags)
179 #define ENA_FLAG_SET_ATOMIC(bit, adapter) \
180 BIT_SET_ATOMIC(ENA_FLAGS_NUMBER, (bit), &(adapter)->flags)
181 #define ENA_FLAG_CLEAR_ATOMIC(bit, adapter) \
182 BIT_CLR_ATOMIC(ENA_FLAGS_NUMBER, (bit), &(adapter)->flags)
189 typedef struct _ena_vendor_info_t {
196 /* Interrupt resources */
197 struct resource *res;
198 driver_filter_t *handler;
204 char name[ENA_IRQNAME_SIZE];
208 struct ena_adapter *adapter;
209 struct ena_ring *tx_ring;
210 struct ena_ring *rx_ring;
212 struct task cleanup_task;
213 struct taskqueue *cleanup_tq;
219 struct ena_calc_queue_size_ctx {
220 struct ena_com_dev_get_features_ctx *get_feat_ctx;
221 struct ena_com_dev *ena_dev;
223 uint16_t rx_queue_size;
224 uint16_t tx_queue_size;
225 uint16_t max_tx_sgl_size;
226 uint16_t max_rx_sgl_size;
230 struct ena_netmap_tx_info {
231 uint32_t socket_buf_idx[ENA_PKT_MAX_BUFS];
232 bus_dmamap_t map_seg[ENA_PKT_MAX_BUFS];
233 unsigned int sockets_used;
237 struct ena_tx_buffer {
239 /* # of ena desc for this specific mbuf
240 * (includes data desc and metadata desc) */
241 unsigned int tx_descs;
242 /* # of buffers used by this mbuf */
243 unsigned int num_of_bufs;
247 /* Used to detect missing tx packets */
248 struct bintime timestamp;
252 struct ena_netmap_tx_info nm_info;
253 #endif /* DEV_NETMAP */
255 struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
256 } __aligned(CACHE_LINE_SIZE);
258 struct ena_rx_buffer {
261 struct ena_com_buf ena_buf;
263 uint32_t netmap_buf_idx;
264 #endif /* DEV_NETMAP */
265 } __aligned(CACHE_LINE_SIZE);
267 struct ena_stats_tx {
270 counter_u64_t prepare_ctx_err;
271 counter_u64_t dma_mapping_err;
272 counter_u64_t doorbells;
273 counter_u64_t missing_tx_comp;
274 counter_u64_t bad_req_id;
275 counter_u64_t collapse;
276 counter_u64_t collapse_err;
277 counter_u64_t queue_wakeup;
278 counter_u64_t queue_stop;
279 counter_u64_t llq_buffer_copy;
282 struct ena_stats_rx {
285 counter_u64_t refil_partial;
286 counter_u64_t bad_csum;
287 counter_u64_t mjum_alloc_fail;
288 counter_u64_t mbuf_alloc_fail;
289 counter_u64_t dma_mapping_err;
290 counter_u64_t bad_desc_num;
291 counter_u64_t bad_req_id;
292 counter_u64_t empty_rx_ring;
296 /* Holds the empty requests for TX/RX out of order completions */
298 uint16_t *free_tx_ids;
299 uint16_t *free_rx_ids;
301 struct ena_com_dev *ena_dev;
302 struct ena_adapter *adapter;
303 struct ena_com_io_cq *ena_com_io_cq;
304 struct ena_com_io_sq *ena_com_io_sq;
308 /* Determines if device will use LLQ or normal mode for TX */
309 enum ena_admin_placement_policy_type tx_mem_queue_type;
310 /* The maximum length the driver can push to the device (For LLQ) */
311 uint8_t tx_max_header_size;
313 bool first_interrupt;
314 uint16_t no_interrupt_event_cnt;
316 struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS];
319 * Fields used for Adaptive Interrupt Modulation - to be implemented in
320 * the future releases
322 uint32_t smoothed_interval;
323 enum ena_intr_moder_level moder_tbl_idx;
328 uint16_t next_to_use;
329 uint16_t next_to_clean;
332 struct ena_tx_buffer *tx_buffer_info; /* contex of tx packet */
333 struct ena_rx_buffer *rx_buffer_info; /* contex of rx packet */
335 int ring_size; /* number of tx/rx_buffer_info's entries */
337 struct buf_ring *br; /* only for TX */
338 uint32_t buf_ring_size;
344 struct task enqueue_task;
345 struct taskqueue *enqueue_tq;
349 struct ena_stats_tx tx_stats;
350 struct ena_stats_rx rx_stats;
355 /* For Tx ring to indicate if it's running or not */
359 /* How many packets are sent in one Tx loop, used for doorbells */
363 uint8_t *push_buf_intermediate_buf;
367 #endif /* DEV_NETMAP */
368 } __aligned(CACHE_LINE_SIZE);
370 struct ena_stats_dev {
371 counter_u64_t wd_expired;
372 counter_u64_t interface_up;
373 counter_u64_t interface_down;
374 counter_u64_t admin_q_pause;
377 struct ena_hw_stats {
378 counter_u64_t rx_packets;
379 counter_u64_t tx_packets;
381 counter_u64_t rx_bytes;
382 counter_u64_t tx_bytes;
384 counter_u64_t rx_drops;
387 /* Board specific private data structure */
389 struct ena_com_dev *ena_dev;
391 /* OS defined structs */
394 struct ifmedia media;
397 struct resource *memory;
398 struct resource *registers;
400 struct mtx global_mtx;
404 struct msix_entry *msix_entries;
407 /* DMA tags used throughout the driver adapter for Tx and Rx */
408 bus_dma_tag_t tx_buf_tag;
409 bus_dma_tag_t rx_buf_tag;
414 uint16_t max_tx_sgl_size;
415 uint16_t max_rx_sgl_size;
417 uint32_t tx_offload_cap;
419 /* Tx fast path data */
422 unsigned int tx_ring_size;
423 unsigned int rx_ring_size;
425 uint16_t buf_ring_size;
428 uint8_t rss_ind_tbl[ENA_RX_RSS_TABLE_SIZE];
430 uint8_t mac_addr[ETHER_ADDR_LEN];
435 /* Queue will represent one TX and one RX ring */
436 struct ena_que que[ENA_MAX_NUM_IO_QUEUES]
437 __aligned(CACHE_LINE_SIZE);
440 struct ena_ring tx_ring[ENA_MAX_NUM_IO_QUEUES]
441 __aligned(CACHE_LINE_SIZE);
444 struct ena_ring rx_ring[ENA_MAX_NUM_IO_QUEUES]
445 __aligned(CACHE_LINE_SIZE);
447 struct ena_irq irq_tbl[ENA_MAX_MSIX_VEC(ENA_MAX_NUM_IO_QUEUES)];
450 struct callout timer_service;
451 sbintime_t keep_alive_timestamp;
452 uint32_t next_monitored_tx_qid;
453 struct task reset_task;
454 struct taskqueue *reset_tq;
456 sbintime_t keep_alive_timeout;
457 sbintime_t missing_tx_timeout;
458 uint32_t missing_tx_max_queues;
459 uint32_t missing_tx_threshold;
462 struct ena_stats_dev dev_stats;
463 struct ena_hw_stats hw_stats;
465 enum ena_regs_reset_reason_types reset_reason;
468 #define ENA_RING_MTX_LOCK(_ring) mtx_lock(&(_ring)->ring_mtx)
469 #define ENA_RING_MTX_TRYLOCK(_ring) mtx_trylock(&(_ring)->ring_mtx)
470 #define ENA_RING_MTX_UNLOCK(_ring) mtx_unlock(&(_ring)->ring_mtx)
472 static inline int ena_mbuf_count(struct mbuf *mbuf)
476 while ((mbuf = mbuf->m_next) != NULL)
482 int ena_up(struct ena_adapter *);
483 void ena_down(struct ena_adapter *);
484 int ena_restore_device(struct ena_adapter *);
485 void ena_destroy_device(struct ena_adapter *, bool);
486 int ena_refill_rx_bufs(struct ena_ring *, uint32_t);
489 validate_rx_req_id(struct ena_ring *rx_ring, uint16_t req_id)
491 if (likely(req_id < rx_ring->ring_size))
494 device_printf(rx_ring->adapter->pdev, "Invalid rx req_id: %hu\n",
496 counter_u64_add(rx_ring->rx_stats.bad_req_id, 1);
498 /* Trigger device reset */
499 if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, rx_ring->adapter))) {
500 rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
501 ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, rx_ring->adapter);
507 #endif /* !(ENA_H) */